音频编码概述

pcm是最原始的音频编码格式，这种编码是无损的。同时意味着存储这种数据的文件将会很庞大，因此必须进行压缩。pcm是音频的编码格式，它不是文件的封装格式。
aac既是一种文件的封装格式，又是音频的编码格式。一aac为封装格式的文件，以.aac为后缀。aac封装格式一般内部的音频数据编码格式也为aac。
下面介绍几个音频相关的参数，这几个参数是编码器进行编码所必需的。
我们总共需要设置四个参数即可：
1.sample_rate
codecContext->sample_rate = frame->sample_rate;
sample_rate指的是采样率。也就是我们一秒钟采集多少次声音样本。
2.frame->channels
codecContext->channels = frame->channels;
frame->channels之的是通道的数目。音频一般有双通道或者单通道之分，一般都是双通道吧，我们的程序里面也是设置为双通道的。也就是frame->channels=2.
3.frame->format
codecContext->sample_fmt = frame->format;
frame->format指的是样本的格式。一个音频的样本一般用两个字节来描述，分为大小端。我们的程序中使用的是16bit的小端格式。
4.channel_layout
codecContext->channel_layout = AV_CH_LAYOUT_STEREO;
channel_layout 用来设置输出通道布局。

通过FFmpeg命令生成PCM文件

ffmpeg -i input.mp4 -vn -ar 44100 -ac 2 -f s16le s16le.pcm

该命令的作用是把input.mp4文件转换为s16(声卡支持该格式）、小端格式的PCM文件：s16le.pcm

注：

-i 表示设定输入流

-vn 去除视频信息

-ar 音频采样率

-ac 音频通道数

-f 输出格式

播放PCM文件

ffplay -ar 44100 -ac 2 -f s16le -i s16le.pcm

PCM转AAC（命令行的方式）

ffmpeg -ar 44100 -ac 2 -f s16le -i s16le.pcm out.aac

PCM转AAC（编码）

/**
 *
 *本程序实现了音频PCM采样数据编码为压缩码流（MP3，WMA，AAC等）。
 *
 */

#include <stdio.h>

#define __STDC_CONSTANT_MACROS


extern "C"
{
#include "libavformat/avformat.h"
#include "libavformat/avio.h"
#include "libavdevice/avdevice.h"
#include "libavcodec/avcodec.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil/imgutils.h"
}



int flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index)
{
	int ret;
	int got_frame;
	AVPacket enc_pkt;
	if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities &
	AV_CODEC_CAP_DELAY))
		return 0;
	while (1)
	{
		enc_pkt.data = NULL;
		enc_pkt.size = 0;
		av_init_packet(&enc_pkt);

		ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt,
		NULL, &got_frame);
		av_frame_free(NULL);
		if (ret < 0)
			break;
		if (!got_frame)
		{
			ret = 0;
			break;
		}
		printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size);
		/* mux encoded frame */
		ret = av_write_frame(fmt_ctx, &enc_pkt);
		if (ret < 0)
			break;
	}
	return ret;
}

int main(int argc, char *argv[])
{
	AVFormatContext *pFormatCtx = NULL;
	AVCodecContext *pCodecCtx = NULL;
	AVCodec *pCodec = NULL;
	AVFrame *pFrame = NULL;
	AVPacket pkt;
	int i = 0;

	const char *inFilename = "s16le.pcm";
	const char *outFilename = "output.aac";

	avdevice_register_all();

	avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, outFilename);

	if (avio_open(&pFormatCtx->pb, outFilename, AVIO_FLAG_READ_WRITE) < 0)
	{
		printf("can't open output file\n");
		return -1;
	}

	AVStream *stream = avformat_new_stream(pFormatCtx, NULL);
	if (!stream)
	{
		printf("can't allocate new stream\n");
		return -1;
	}

	//设置参数
	pCodecCtx = stream->codec;
	pCodecCtx->codec_id = pFormatCtx->oformat->audio_codec;
	pCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
	pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;
	pCodecCtx->sample_rate = 44100;
	pCodecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
	pCodecCtx->channels = av_get_channel_layout_nb_channels(pCodecCtx->channel_layout);
	pCodecCtx->bit_rate = 128000;
//	pCodecCtx->frame_size = 1024;

//查找编码器
	pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
//	pCodec = avcodec_find_encoder_by_name("libfdk_aac");
	if (!pCodec)
	{
		printf("can't find encoder\n");
		return -1;
	}

	//打开编码器
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
	{
		printf("can't open encoder\n");
		return -1;
	}

	pFrame = av_frame_alloc();
	if (!pFrame)
	{
		printf("can't alloc frame\n");
		return -1;
	}

	pFrame->nb_samples = pCodecCtx->frame_size;
	pFrame->format = pCodecCtx->sample_fmt;
	pFrame->channels = 2;

	//PCM重采样
	SwrContext *swrCtx = swr_alloc();

	swr_alloc_set_opts(swrCtx, av_get_default_channel_layout(pCodecCtx->channels), pCodecCtx->sample_fmt,
			pCodecCtx->sample_rate, av_get_default_channel_layout(pFrame->channels), AV_SAMPLE_FMT_S16, //PCM源文件的采样格式
			44100, 0, NULL);
	swr_init(swrCtx);

	/* 分配空间 */
	uint8_t **convert_data = (uint8_t**) calloc(pCodecCtx->channels, sizeof(*convert_data));
	av_samples_alloc(convert_data, NULL, pCodecCtx->channels, pCodecCtx->frame_size, pCodecCtx->sample_fmt, 0);


	int size = av_samples_get_buffer_size(NULL, pCodecCtx->channels, pCodecCtx->frame_size, pCodecCtx->sample_fmt, 1);
	uint8_t *frameBuf = (uint8_t*) av_malloc(size);
	avcodec_fill_audio_frame(pFrame, pCodecCtx->channels, pCodecCtx->sample_fmt, (const uint8_t*) frameBuf, size, 1);

	//写帧头
	avformat_write_header(pFormatCtx, NULL);

	FILE *inFile = fopen(inFilename, "rb");

	av_init_packet(&pkt);
	pkt.data = NULL;
	pkt.size = 0;

	for (i = 0;; i++)
	{
		//输入一帧数据的长度
		int length = pFrame->nb_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * pFrame->channels;

		//读PCM：特意注意读取的长度，否则可能出现转码之后声音变快或者变慢
		if (fread(frameBuf, 1, length, inFile) <= 0)
		{
			printf("failed to read raw data\n");
			return -1;
		}
		else if (feof(inFile))
		{
			break;
		}

		swr_convert(swrCtx, convert_data, pCodecCtx->frame_size, (const uint8_t**) pFrame->data, pFrame->nb_samples);

		//输出一帧数据的长度
		length = pCodecCtx->frame_size * av_get_bytes_per_sample(pCodecCtx->sample_fmt);
		//双通道赋值（输出的AAC为双通道）
		memcpy(pFrame->data[0], convert_data[0], length);
		memcpy(pFrame->data[1], convert_data[1], length);

		pFrame->pts = i * 100;

		if (avcodec_send_frame(pCodecCtx, pFrame) < 0)
		{
			printf("can't send frame for encoding\n");
			break;
		}

		if (avcodec_receive_packet(pCodecCtx, &pkt) >= 0)
		{
			pkt.stream_index = stream->index;
			printf("write %4d frame, size = %d, length = %d\n", i, size, length);
			av_write_frame(pFormatCtx, &pkt);
		}

		av_packet_unref(&pkt);
	}

	//flush encoder
	if (flush_encoder(pFormatCtx, 0) < 0)
	{
		printf("flushing encoder failed\n");
		return -1;
	}

	//write trailer
	av_write_trailer(pFormatCtx);

	avcodec_close(stream->codec);
	av_free(pFrame);
	av_free(frameBuf);

	avio_close(pFormatCtx->pb);
	avformat_free_context(pFormatCtx);

	fclose(inFile);

	return 0;
}

问题总结

问题1

Specified sample format s16 is invalid or not supported

解决办法：

最新的ffmpeg，16年以后，就只支持这一种AAC音频格式，所以想对PCM进行编码需要确定PCM是AV_SAMPLE_FMT_FLTP类型的。

修改我们的代码：


pCodecCtx->sample_fmt = AV_SAMPLE_FMT_FLTP;

如果修改为AV_SAMPLE_FMT_FLTP，我们也需要保证我们的PCM文件的格式为AV_SAMPLE_FMT_FLTP（fltp），这样在转码的时候才不至于出错。

怎样通过命令行的方式生成FLTP格式的PCM，目前没有找到。有知道的朋友可以告知一下。

目前我们通过swr_convert函数来转换。

问题2

解码之后播放时间变短或者变长，还有就是声音明显比正常的要快

根本原因都是因为读取一帧输入文件的buffer的长度不对导致的

解决办法：

//输入一帧数据的长度
int length = pFrame->nb_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * pFrame->channels;

       //读PCM：特意注意读取的长度，否则可能出现转码之后声音变快或者变慢
       if (fread(frameBuf, 1, length, inFile) <= 0)
       {
           printf("failed to read raw data\n");
           return -1;
       }
       else if (feof(inFile))
       {
           break;
       }

我们现在测试的是双通道，如果是单通道的PCM文件的话，需要修改：

pFrame->channels = 2;

特别注意读入一帧输入文件的buffer的长度不对，可能导致很多问题。

tiankong19999

发布了61 篇原创文章 · 获赞 124 · 访问量 70万+

他的留言板关注

FFmpeg —— 14.示例程序（八）：音频编码器（PCM编码为AAC）