Audio mp3/aac/wav decoding based on FFMpeg

Compilation environment: Ubuntu16.04 64-bit
Cross-compilation tool: arm-himix200-linux-gcc

Article directory

1. ffmpeg source code download

What I use here is ffmpeg-5.1.2.tar.gz. For the download address, click the download address .

2. Cross compilation

cd /root/
tar zxvf ffmpeg-5.1.2.tar.gz
cd ffmpeg-5.1.2
mkdir output
./configure --cross-prefix=arm-himix200-linux- --enable-cross-compile --target-os=linux --cc=arm-himix200-linux-gcc --arch=arm --prefix=/root/ffmpeg-5.1.2/output --disable-x86asm --disable-debug --disable-doc --disable-zlib  --disable-v4l2-m2m --disable-iconv --disable-network --disable-ffplay --disable-ffprobe --disable-symver --disable-indevs --disable-outdevs --disable-parsers --disable-bsfs --disable-filters --disable-protocols --disable-hwaccels --disable-muxers --disable-demuxers --disable-encoders --disable-decoders --enable-demuxer=aac --enable-demuxer=mp3 --enable-demuxer=wav --enable-decoder=mp3 --enable-decoder=aac --enable-decoder=pcm_alaw --enable-decoder=pcm_bluray --enable-decoder=pcm_dvd --enable-decoder=pcm_f16le --enable-decoder=pcm_f24le --enable-decoder=pcm_f32be --enable-decoder=pcm_f32le --enable-decoder=pcm_f64be --enable-decoder=pcm_f64le --enable-decoder=pcm_lxf --enable-decoder=pcm_mulaw --enable-decoder=pcm_s16be --enable-decoder=pcm_s16be_planar --enable-decoder=pcm_s16le --enable-decoder=pcm_s16le_planar --enable-decoder=pcm_s24be --enable-decoder=pcm_s24daud --enable-decoder=pcm_s24le --enable-decoder=pcm_s24le_planar --enable-decoder=pcm_s32be --enable-decoder=pcm_s32le --enable-decoder=pcm_s32le_planar --enable-decoder=pcm_s64be --enable-decoder=pcm_s64le --enable-decoder=pcm_s8 --enable-decoder=pcm_s8_planar --enable-decoder=pcm_u16be --enable-decoder=pcm_u16le --enable-decoder=pcm_u24be --enable-decoder=pcm_u24le --enable-decoder=pcm_u32be --enable-decoder=pcm_u32le --enable-decoder=pcm_u8 --enable-decoder=pcm_vidc --enable-decoder=pcm_zork --enable-protocol=file --enable-small --enable-muxer=pcm_s16le --extra-cflags="-ffunction-sections -fdata-sections -fsigned-char -Wformat"
make
make install

In this way, the program we want is under /root/ffmpeg-5.1.2/output. ffmpeg in the bin directory can be run on the development board. Under include are the required header files. Under lib are the required static libraries. share/ffmpeg/ examples are some sample codes that you can refer to.
Note that the ./configure configuration command can be tailored according to actual needs. My project only needs to decode mp3, aac and wav, so only the corresponding demuxer and decoder are configured.

CFLAGS += -ffunction-sections -fdata-sections
LDFLAGS += -Wl,--gc-sections

GCC link operation uses section as the smallest processing unit. As long as a symbol in a section is referenced, the section will be added to the executable program. Therefore, GCC can use -ffunction-sections and -fdata-sections when compiling to create each function or symbol as a section, where the name of each section is consistent with the function or data name. During the linking phase, -Wl,--gc-sections instructs the linker to remove unused sections (-wl, indicating that the subsequent parameter -gc-sections is passed to the linker), thus reducing the size of the final executable program. .

3. Static library linking

LIBS += libavdevice.a \
		libavfilter.a \
		libavformat.a \
		libavcodec.a \
		libavutil.a \
		libswresample.a \
		libswscale.a

Be careful not to mess up the order, otherwise it will report that the function cannot be found because there are dependencies.

There is no need to use libavdevice.a (reading devices, cameras or screen recordings, etc.), libavfilter.a (adding special effects, such as watermarks, etc.) and libswscale.a (image stretching, pixel format conversion, etc.).

LIBS += libavformat.a \
		libavcodec.a \
		libavutil.a \
		libswresample.a

4. Header file

#ifndef __AUDIO_DECODER__
#define __AUDIO_DECODER__

#ifdef __cplusplus
extern "C" {
#endif
#include "libavformat/avformat.h"
#include "libavformat/avio.h"
#include "libavcodec/avcodec.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avassert.h"
#include "libavutil/avstring.h"
#include "libavutil/channel_layout.h"
#include "libavutil/frame.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libswresample/swresample.h"
#ifdef __cplusplus
}
#endif

class CAudioDecoder
{
public:
	CAudioDecoder();
	virtual ~CAudioDecoder(void);

	int DecodeOpen(uint codec);
	int DecodeClose(void);

	int DecodeFrame(uchar *pFrame, int nFrameSize, uchar *pOutBuf, int nBufferSize);
private:
	void Decode(uchar *pOutBuf, int nBufferSize, int &nSize);
	
public:
	static int transcode_pcm(const char* inputFile, const char* outputFile);

private:
	AVCodec *m_Codec;
	AVCodecContext *m_Context;
	AVCodecParserContext *m_Parser;
	AVPacket *m_Packet;
	AVFrame *m_Frame;

	struct SwrContext* m_converCtx;
	AVChannelLayout m_ChannelLayout;
	int m_outBytesPerSample;
	
	uint8_t* m_pData;
	int m_nSize;
};

I don’t know why the header file of ffmpeg does not have extern “C”. What is the difference between C++ and C during linking.

5. Audio file conversion


#define TRANSCODE_SAMPLERATE 8000
#define TRANSCODE_FORMAT AV_SAMPLE_FMT_S16
#define TRANSCODE_CHANNELS 1
#define TRANSCODE_NBSAMPLES 4096

/*
	* inputFile        input file name
	* outputFile        output file name
	* return	decode the length of the data
*/
int CAudioDecoder::transcode_pcm(const char* inputFile, const char* outputFile)
{
	CFile file;
	bool bRet = file.Open(outputFile, CFile::modeCreate | CFile::modeWrite);
	if (!bRet)
		return -1;

	int ret = -1;
	AVCodec* codec = NULL;
	AVCodecContext* codecContext = NULL;
	AVCodecParameters* codecpar = NULL;
	AVFrame* frame = NULL;	
	AVPacket *pPacket = NULL;
	struct SwrContext* converCtx = NULL;
	uint8_t** data = NULL;
	
	int streamindex = 0;
	int outBytesPerSample = 0;
	int allocsize = 0;
	AVChannelLayout out_ch_layout = AV_CHANNEL_LAYOUT_MONO;
	
	AVFormatContext* frameContext = avformat_alloc_context();
	if (frameContext == NULL) {
		goto cleanup;
	}

	if (avformat_open_input(&frameContext, inputFile, NULL, NULL) != 0) {
		goto cleanup;
	}

	if (avformat_find_stream_info(frameContext, NULL) < 0) {
		goto cleanup;
	}

	streamindex = av_find_best_stream(frameContext, AVMEDIA_TYPE_AUDIO, -1, -1, (const AVCodec**)&codec, -1);
	if (!codec) {
		goto cleanup;
	}
	
	codecContext = avcodec_alloc_context3(codec);
	codecpar = frameContext->streams[streamindex]->codecpar;
	avcodec_parameters_to_context(codecContext, codecpar);
	if (avcodec_open2(codecContext, codec, NULL) < 0) {
		goto cleanup;
	}

	outBytesPerSample = TRANSCODE_CHANNELS * av_get_bytes_per_sample(TRANSCODE_FORMAT);
	frame = av_frame_alloc();
	swr_alloc_set_opts2(&converCtx, &out_ch_layout, TRANSCODE_FORMAT, TRANSCODE_SAMPLERATE, &codecContext->ch_layout, codecContext->sample_fmt, codecContext->sample_rate, 0, NULL);
	swr_init(converCtx);
	data = (uint8_t**)av_calloc(1, sizeof(*data));
	allocsize = av_samples_alloc(data, NULL, TRANSCODE_CHANNELS, TRANSCODE_NBSAMPLES, TRANSCODE_FORMAT, 0);

	pPacket = av_packet_alloc();
	while (av_read_frame(frameContext, pPacket) >= 0) {
		if (pPacket->stream_index != streamindex)
			continue;

		if (avcodec_send_packet(codecContext, pPacket) < 0) {
			goto cleanup;
		}

		while (avcodec_receive_frame(codecContext, frame) >= 0) {
			ret = swr_convert(converCtx, data, allocsize, (const uint8_t**)frame->data, frame->nb_samples);
			if (ret > 0)
				file.Write(data[0], ret * outBytesPerSample);
		}
		av_packet_unref(pPacket);
	}

	while ((ret = swr_convert(converCtx, data, allocsize, NULL, 0)) > 0) {
		file.Write(data[0], ret * outBytesPerSample);
	}

cleanup:
	if (frameContext) {
		avformat_close_input(&frameContext);
		avformat_free_context(frameContext);
	}

	if (codecContext)
		avcodec_free_context(&codecContext);

	if (pPacket)
		av_packet_free(&pPacket);

	if (data)
		av_freep(&data[0]);
	
	av_freep(&data);

	if (frame)
		av_frame_free(&frame);
	
	if (converCtx)
		swr_free(&converCtx);

	if (file.IsOpened()) {
		ret = file.GetLength();
		file.Close();
	}
	
	return ret;
}

Note that CFile is the file class in my project and can be replaced by the file IO API.

6. Real-time audio decoding

Taking aac as an example, due to the use of aac's parser, options need to be added when cross-compiling ffmpeg:

--enable-parser=aac

Similarly, if you need to support mp3 decoding, you need to add options when cross-compiling ffmpeg:

--enable-parser=mp3

Other formats are similar.

#include "AudioDecoder.h"

CAudioDecoder::CAudioDecoder()
{
	m_Codec = NULL;
	m_Context = NULL;
	m_Parser = NULL;
	m_Packet = NULL;
	m_Frame = NULL;

	m_converCtx = NULL;
	m_ChannelLayout = AV_CHANNEL_LAYOUT_MONO;
	m_outBytesPerSample = 0;
	
	m_pData = NULL;
	m_nSize = 0;
}

CAudioDecoder::~CAudioDecoder(void)
{
	DecodeClose();
}

int CAudioDecoder::DecodeOpen(uint codec)
{
	int ret = -1;

	switch (codec) {
		case CODEC_AAC:
			m_Codec = (AVCodec *)avcodec_find_decoder(AV_CODEC_ID_AAC);
			break;
		default:
			m_Codec = NULL;
			break;
	}

	if (!m_Codec) {
		fprintf(stderr, "Codec not found\n");
		goto error;
	}
	
	m_Context = avcodec_alloc_context3(m_Codec);
	if (!m_Context) {
		fprintf(stderr, "Could not allocate audio codec context\n");
		goto error;
	}
	
	ret = avcodec_open2(m_Context, m_Codec, NULL);
	if (ret < 0) {
		fprintf(stderr, "Could not open codec\n");
		goto error;
	}

	m_Parser = av_parser_init(m_Codec->id);
	if (!m_Parser) {
		fprintf(stderr, "Parser not found\n");
		goto error;
	}

	m_Packet = av_packet_alloc();
	if (!m_Packet) {
		fprintf(stderr, "Could not allocate audio packet\n");
		goto error;
	}
		
	m_Frame = av_frame_alloc();
	if (!m_Frame) {
		fprintf(stderr, "Could not allocate audio frame\n");
		goto error;
	}

	m_outBytesPerSample = TRANSCODE_CHANNELS * av_get_bytes_per_sample(TRANSCODE_FORMAT);
	m_nSize = TRANSCODE_NBSAMPLES * m_outBytesPerSample;
	m_pData = (uint8_t *)av_malloc(m_nSize);

	return 0;
	
error:
	DecodeClose();
	return ret;
}

int CAudioDecoder::DecodeClose(void)
{
	av_freep(&m_pData);

	if (m_Frame)
	    av_frame_free(&m_Frame);

	if (m_converCtx)
		swr_free(&m_converCtx);

	if (m_Packet)
	    av_packet_free(&m_Packet);

	if (m_Parser)
    	av_parser_close(m_Parser);

	if (m_Context)
    	avcodec_free_context(&m_Context);


	return 0;
}

int CAudioDecoder::DecodeFrame(uchar *pFrame, int nFrameSize, uchar *pOutBuf, int nBufferSize)
{
	if (pFrame == NULL || nFrameSize == 0 || pOutBuf == NULL || nBufferSize == 0) {
		return -1;
	}

	int nSize = 0;
    uint8_t *data = pFrame;
    int data_size = nFrameSize;
	
	while (data_size > 0) {
        int ret = av_parser_parse2(m_Parser, m_Context, &m_Packet->data, &m_Packet->size, data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
		if (ret < 0) {
			fprintf(stderr, "Error while parsing\n");
			break;
		}

		if (m_Packet->size > 0) {
			data += ret;
			data_size -= ret;

			Decode(pOutBuf, nBufferSize, nSize);
		} else {
			break;
		}
	}

	return nSize;
}

void CAudioDecoder::Decode(uchar *pOutBuf, int nBufferSize, int &nSize)
{
	int ret = avcodec_send_packet(m_Context, m_Packet);
	if (ret < 0) {
		return ;
	}
	
	swr_alloc_set_opts2(&m_converCtx, &m_ChannelLayout, TRANSCODE_FORMAT, TRANSCODE_SAMPLERATE, &m_Context->ch_layout, m_Context->sample_fmt, m_Context->sample_rate, 0, NULL);
	swr_init(m_converCtx);

	while (avcodec_receive_frame(m_Context, m_Frame) >= 0) {		
		ret = swr_convert(m_converCtx, &m_pData, m_nSize, (const uint8_t**)m_Frame->data, m_Frame->nb_samples);
		if (ret > 0) {
			if (nBufferSize < nSize + ret * m_outBytesPerSample) {
				fprintf(stderr, "nBufferSize=[%d] too small!!!\n", nBufferSize);
				break;
			}
			
			memcpy(pOutBuf + nSize, m_pData, ret * m_outBytesPerSample);
			nSize += ret * m_outBytesPerSample;
		}
	}

	while ((ret = swr_convert(m_converCtx, &m_pData, m_nSize, NULL, 0)) > 0) {
		if (nBufferSize < nSize + ret * m_outBytesPerSample) {
			fprintf(stderr, "nBufferSize=[%d] too small!!!\n", nBufferSize);
			break;
		}
		memcpy(pOutBuf + nSize, m_pData, ret * m_outBytesPerSample);
		nSize += ret * m_outBytesPerSample;
	}

	return ;
}

Note: CODEC_AAC is the aac format macro in my project, replace it with your own aac/mp3, and replace AV_CODEC_ID_AAC with the corresponding enumeration to complete the decoding of other formats.

7. Instructions for use

7.1 File conversion

int nPcmSize = CAudioDecoder::transcode_pcm(inputFile, outputFile);

inputFile: the file name that needs to be decoded
outputFile: the decoded file name
nPcmSize: the decoded file length, <=0 fails, >0 succeeds

7.2 Real-time decoding

Create AAC decoder

CAudioDecoder *decoder = new CAudioDecoder();
decoder->DecodeOpen(CODEC_AAC);

decoding

int OutSize = decoder->DecodeFrame(pFrame, nFrameSize, pOutBuf, nBufferSize);

Destroy decoder

delete decoder;
decoder = NULL;

Decode data, mn frames can be transmitted, m>=0 n>=0
pFrame input, data to be decoded
nFrameSize input, data length to
be decoded pOutBuf input output, decoded data, buffer allocated by the caller
nBufferSize input, passed in The buffer pOutBuf length
return returns the used data length of the buffer after decoding. If the incoming data is less than 1 frame, the local call will return 0 until the next input data reaches 1 frame.

Note: Do not repeatedly call the create and destroy decoder for a continuous set of audio data.