FFmpeg - 音频解码 - 代码天地

这篇博客主要任务是讲封装格式的音频文件解码成pcm数据，然后使用ffplay播放，其中ffplay 一定要配置环境变量否则不能播放

一、解封装

解封装主要任务就是把mp3 等封装格式的文件，解析到FFmpeg对应的结构体中(AVFormatContext)。
AVFormatContext是FFmpeg一个存放解封装格式数据的结构体，里面存放了有媒体流(音视频流），媒体文件包含的流的个数等等
解封装主要有三步 avformat_alloc_context() 、avformat_open_input(&avFormatContext, src_url, NULL, &avDictionary) 、avformat_find_stream_info(avFormatContext, NULL)
avformat_alloc_context ：申请一个空的AVFormatContext 结构体
avformat_open_input ：打开媒体文件，检验这个文件是否是一个可以打开的文件
avformat_find_stream_info ：找到媒体文件中的流信息，然后这是到结构体中

   AVFormatContext *avFormatContext = avformat_alloc_context();

    avformat_network_init();
    AVDictionary *avDictionary;
    av_dict_set(&avDictionary, "timeout", "20000000", 0);

    if (avformat_open_input(&avFormatContext, src_url, NULL, &avDictionary)) {
        return;
    }

    av_dict_free(&avDictionary);

    if (avformat_find_stream_info(avFormatContext, NULL) < 0) {
        return;
    }

二、解码

解码需要理解四个结构体AVStream、 AVPacket 和 AVFrame 以及 AVCodecContext，其中AVPacket 是存放是编码格式的一帧数据， AVFrame 存放的是解码后的一帧数据。解码的过程其实就是从AVCodecContext 取出一个AVPacket 解码成 AVFrame的过程。

第一步、获取到`AVCodecContext`

获取到编解码器上下文结构体，首先我们要得到流，因为才能得到流身上的编解码Id，才能找到编解码器 AVCodec，然后再能得到编解码器上下文 AVCodecContext

avFormatContext->streams[i] ： streams 是一个二级指针（指针数组），存放的是媒体文件中的流数据，如音频流视频流以及字幕流
avStream->codecpar : AVCodecParameters 编解码器参数里面存放了编解码器相关的信息
avcodec_find_decoder(avCodecParameters->codec_id) : 获取到编解码器通过流的编解码器参数的编解码器id
avcodec_alloc_context3(avCodec) ：通过编解码器获取到编解码器上下文
avcodec_parameters_to_context(avCodecContext, avCodecParameters) ：将编解码器参数设置给编解码器上下文
avcodec_open2(avCodecContext, avCodec, NULL) 将编解码器打开，此时编解码器上下文才可以用

	int audio_stream_index = -1;
    AVStream *avStream;
    for (int i = 0; i < avFormatContext->nb_streams; ++i) {
        avStream = avFormatContext->streams[i];
        if (avStream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            audio_stream_index = i;
            break;
        }
    }

    if (audio_stream_index == -1) {
        return;
    }
    AVCodecParameters *avCodecParameters = avStream->codecpar;
    AVCodec *avCodec = avcodec_find_decoder(avCodecParameters->codec_id);

    AVCodecContext *avCodecContext = avcodec_alloc_context3(avCodec);
    if (avcodec_parameters_to_context(avCodecContext, avCodecParameters)) {
        return;
    }
    if (avcodec_open2(avCodecContext, avCodec, NULL)) {
        return;
    }

第二步、AVPacket 转换成AVFrame

主要就是从过AVFromatContext 取出一个AVPacket数据然后交给 AVCodecContext转换成 AVFrame数据

av_packet_alloc()：申请一个AVPacket结构体空的
av_read_frame(avFormatContext, avPacket) ：从AVFromatContext中读出一帧数据给AVPacket
av_frame_alloc : 申请一个AVFrame结构体
avcodec_send_packet(avCodecContext, avPacket) ：将avPacket数据发送给avCodecContext 这里有些不好理解
avcodec_receive_frame(avCodecContext, avFrame) ：将avPacket数据转给AVFrame

   AVPacket *avPacket = av_packet_alloc();

    while (av_read_frame(avFormatContext, avPacket) >= 0) {

        AVFrame *avFrame = av_frame_alloc();
        if (avcodec_send_packet(avCodecContext, avPacket)) {
            break;
        }

        int ret = avcodec_receive_frame(avCodecContext, avFrame);
        if (ret == AVERROR(EAGAIN)) {
            continue;
        } else if (ret < 0) {
            return;
        }

        if (avPacket->stream_index != audio_stream_index) {
            return;
        }
		//至此 一帧数据解码完成
	}

三、重采样

对PCM原始数据的采样率、帧格式、通道数进行重采样功能封装

第一步、重采样准备设置

swr_alloc() ：创建一个SwrContext 音频转换上下文结构体
swr_alloc_set_opts(swrContext, out_channel_layout, out_sample_fmt, out_sample_rate,in_channel_layout, in_sample_fmt, in_sample_rate,0, NULL) ：设置输出和输入格式，其中采样率最好采用动态获取，因为每个音频的采样率可能不同
swr_init(swrContext) ：初始化上下文
计算缓冲数据输出

    // 1、初始化一些重采样需要的设置
    //输入的信息
    AVSampleFormat in_sample_fmt = avCodecContext->sample_fmt;
    uint64_t in_channel_layout = avCodecContext->channel_layout;
    int in_sample_rate = avCodecContext->sample_rate;
    LOGD("in_sample_rate = %d",in_sample_rate);

    //输出信息
    int out_channel_layout = AV_CH_LAYOUT_STEREO;
    int out_sample_rate = in_sample_rate;
    AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;

    SwrContext *swrContext = swr_alloc();
    swr_alloc_set_opts(swrContext,
                       out_channel_layout, out_sample_fmt, out_sample_rate,
                       in_channel_layout, in_sample_fmt, in_sample_rate,
                       0, NULL);

    swr_init(swrContext);

    //定义一个缓存
    int channel_layout = av_get_channel_layout_nb_channels(out_channel_layout);
    int sample_fmt = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
    LOGD("channel_layout = %d", channel_layout);
    int out_buffer_size = channel_layout * in_sample_rate * sample_fmt;
    LOGD("out_buffer_size = %d", out_buffer_size);
    uint8_t *out_buffers = static_cast<uint8_t *>(av_malloc(out_buffer_size/*2 * out_sample_rate*/));

    FILE *out_pcm = fopen(dst_file_path, "wb");

第二步、开始转换

动态计算输出数量

int64_t dst_nb_samples = av_rescale_rnd(swr_get_delay(swrContext,avFrame->sample_rate) + avFrame->nb_samples,
                                                out_sample_rate,avFrame->sample_rate,AV_ROUND_UP);

2、swr_convert() 真正转换的api，avFrame->data 中往缓冲中输入数据
3、计算采样缓冲的大小方便，然后从缓冲往文件中写


        int64_t dst_nb_samples = av_rescale_rnd(swr_get_delay(swrContext,avFrame->sample_rate) + avFrame->nb_samples,
                                                out_sample_rate,avFrame->sample_rate,AV_ROUND_UP);

        // 音频重采样
        // 声卡 要求 音频 输出的格式统一（采用率统一，通道数统一，...）
        // 把PCM原始音频数据  --> 统一处理 --》 建立统一格式

        swr_convert(swrContext,
                // 输出相关的
                    &out_buffers, dst_nb_samples,
                // 输入相关的
                    (const uint8_t **) (avFrame->data), avFrame->nb_samples
        );

        int out_buffer_size = av_samples_get_buffer_size(NULL, 2, avFrame->nb_samples,
                                                         out_sample_fmt, 1);
        fwrite(out_buffers, 1, out_buffer_size, out_pcm);

最后贴上完整的代码

#include <jni.h>
#include <string>

extern "C" {
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
}

#include <android/log.h>

#define TAG "dsh"
#define LOGD(...)__android_log_print(ANDROID_LOG_DEBUG,TAG,__VA_ARGS__)


extern "C" JNIEXPORT jstring JNICALL
Java_com_example_audiodecode_MainActivity_stringFromJNI(
        JNIEnv *env,
        jobject /* this */) {
    std::string hello = "Hello from C++";
    return env->NewStringUTF(hello.c_str());
}
extern "C"
JNIEXPORT void JNICALL
Java_com_example_audiodecode_MainActivity_audioDecode(JNIEnv *env, jobject thiz, jstring input,
                                                      jstring output) {

    const char *src_url = env->GetStringUTFChars(input, NULL);
    const char *dst_file_path = env->GetStringUTFChars(output, NULL);

    AVFormatContext *avFormatContext = avformat_alloc_context();

    avformat_network_init();
    AVDictionary *avDictionary;
    av_dict_set(&avDictionary, "timeout", "20000000", 0);

    if (avformat_open_input(&avFormatContext, src_url, NULL, &avDictionary)) {
        return;
    }

    av_dict_free(&avDictionary);

    if (avformat_find_stream_info(avFormatContext, NULL) < 0) {
        return;
    }

    int audio_stream_index = -1;
    AVStream *avStream;
    for (int i = 0; i < avFormatContext->nb_streams; ++i) {
        avStream = avFormatContext->streams[i];
        if (avStream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            audio_stream_index = i;
            break;
        }
    }

    if (audio_stream_index == -1) {
        return;
    }
    AVCodecParameters *avCodecParameters = avStream->codecpar;
    AVCodec *avCodec = avcodec_find_decoder(avCodecParameters->codec_id);

    AVCodecContext *avCodecContext = avcodec_alloc_context3(avCodec);
    if (avcodec_parameters_to_context(avCodecContext, avCodecParameters)) {
        return;
    }
    if (avcodec_open2(avCodecContext, avCodec, NULL)) {
        return;
    }

    // todo 定义重载样需要的数据
    // 1、初始化一些重采样需要的设置
    //输入的信息
    AVSampleFormat in_sample_fmt = avCodecContext->sample_fmt;
    uint64_t in_channel_layout = avCodecContext->channel_layout;
    int in_sample_rate = avCodecContext->sample_rate;
    LOGD("in_sample_rate = %d",in_sample_rate);

    //输出信息
    int out_channel_layout = AV_CH_LAYOUT_STEREO;
    int out_sample_rate = in_sample_rate;
    AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;

    SwrContext *swrContext = swr_alloc();
    swr_alloc_set_opts(swrContext,
                       out_channel_layout, out_sample_fmt, out_sample_rate,
                       in_channel_layout, in_sample_fmt, in_sample_rate,
                       0, NULL);

    swr_init(swrContext);

    //定义一个缓存
    int channel_layout = av_get_channel_layout_nb_channels(out_channel_layout);
    int sample_fmt = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
    LOGD("channel_layout = %d", channel_layout);
    int out_buffer_size = channel_layout * in_sample_rate * sample_fmt;
    LOGD("out_buffer_size = %d", out_buffer_size);
    uint8_t *out_buffers = static_cast<uint8_t *>(av_malloc(out_buffer_size/*2 * out_sample_rate*/));

    FILE *out_pcm = fopen(dst_file_path, "wb");

    AVPacket *avPacket = av_packet_alloc();

    while (av_read_frame(avFormatContext, avPacket) >= 0) {

        AVFrame *avFrame = av_frame_alloc();
        if (avcodec_send_packet(avCodecContext, avPacket)) {
            break;
        }

        int ret = avcodec_receive_frame(avCodecContext, avFrame);
        if (ret == AVERROR(EAGAIN)) {
            continue;
        } else if (ret < 0) {
            return;
        }

        if (avPacket->stream_index != audio_stream_index) {
            return;
        }

        //已经转码完成


        int64_t dst_nb_samples = av_rescale_rnd(swr_get_delay(swrContext,avFrame->sample_rate) + avFrame->nb_samples,
                                                out_sample_rate,avFrame->sample_rate,AV_ROUND_UP);

        // 音频重采样
        // 声卡 要求 音频 输出的格式统一（采用率统一，通道数统一，...）
        // 把PCM原始音频数据  --> 统一处理 --》 建立统一格式

        swr_convert(swrContext,
                // 输出相关的
                    &out_buffers, dst_nb_samples,
                // 输入相关的
                    (const uint8_t **) (avFrame->data), avFrame->nb_samples
        );

        int out_buffer_size = av_samples_get_buffer_size(NULL, 2, avFrame->nb_samples,
                                                         out_sample_fmt, 1);
        fwrite(out_buffers, 1, out_buffer_size, out_pcm);


        av_frame_free(&avFrame);
    }
    av_packet_free(&avPacket);

    fclose(out_pcm);

    env->ReleaseStringUTFChars(input, src_url);
    env->ReleaseStringUTFChars(output, dst_file_path);
}

上层代码

public class MainActivity extends AppCompatActivity {

    // Used to load the 'native-lib' library on application startup.
    static {
        System.loadLibrary("native-lib");
    }

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        // Example of a call to a native method
        TextView tv = findViewById(R.id.sample_text);
        tv.setText(stringFromJNI());


        File externalCacheDir = getExternalCacheDir();
        Log.d("dsh",externalCacheDir.getAbsolutePath());
        File file_in = new File(externalCacheDir,"test.mp3");
        File file_out = new File(externalCacheDir,"test.pcm");
        audioDecode(file_in.getAbsolutePath(),file_out.getAbsolutePath());
    }

    /**
     * A native method that is implemented by the 'native-lib' native library,
     * which is packaged with this application.
     */
    public native String stringFromJNI();

    public native void audioDecode(String input,String output);
}

HopCoder

发布了58 篇原创文章 · 获赞 16 · 访问量 6万+

私信关注