NDK学习笔记：FFmpeg解压MP34提取音频PCM

承接 FFmpeg解压MP4提取视频YUV ，这次我们需要提取的是音频原始数据PCM。代码流程大同小异，主要区别就是AVFrame->PCM数据的转换。废话不说了，直接贴代码。

public class ZzrFFmpeg {

    public static native int Mp34TOPcm(String input_media_str, String output_pcm_str);

    public static native int Mp4TOYuv(String input_mp4_str, String output_yuv_str);

    static
    {
        // Try loading libraries...
        try {
            System.loadLibrary("avutil");
            System.loadLibrary("swscale");
            System.loadLibrary("swresample");
            System.loadLibrary("avcodec");
            System.loadLibrary("avformat");

            System.loadLibrary("postproc");
            System.loadLibrary("avfilter");
            System.loadLibrary("avdevice");

            System.loadLibrary("zzr-ffmpeg-utils");
            Log.w("ZzrBlogApp", "ZzrFFmpeg System.loadLibrary ...");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

功能性测试代码一般都放在ZzrFFmpeg这个类。方法命名 Mp34TOPcm(String input_media_str, String output_pcm_str);

#define MAX_AUDIO_FARME_SIZE 48000 * 2

JNIEXPORT jint JNICALL
Java_org_zzrblog_mp_ZzrFFmpeg_Mp34TOPcm(JNIEnv *env, jclass clazz, jstring input_media_jstr, jstring output_pcm_jstr) {
    const char *input_media_cstr = (*env)->GetStringUTFChars(env, input_media_jstr, 0);
    const char *output_pcm_cstr = (*env)->GetStringUTFChars(env, output_pcm_jstr, 0);

    av_log_set_callback(custom_log);
    // 注册组件
    av_register_all();
    avcodec_register_all();
    avformat_network_init();

    AVFormatContext *pFormatContext = avformat_alloc_context();
    if(avformat_open_input(&pFormatContext, input_media_cstr,NULL,NULL) != 0){
        LOGE("%s","打开输入视频文件失败");
        return -1;
    }
    if(avformat_find_stream_info(pFormatContext, NULL) < 0){
        LOGE("%s","获取媒体信息失败");
        return -2;
    }
    int audio_stream_idx = -1;
    for(int i=0; i<pFormatContext->nb_streams; i++)
    {
        if(pFormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            audio_stream_idx = i;
            break;
        }
    }
    AVCodec *pCodec = avcodec_find_decoder(pFormatContext->streams[audio_stream_idx]->codecpar->codec_id);
    if(pCodec == NULL){
        LOGI("%s","无法获取解码器");
        return -3;
    }
    AVCodecContext * pCodecContext = avcodec_alloc_context3(pCodec);
    if(pCodecContext == NULL) {
        LOGE("%s","创建解码器对应的上下文失败.");
        return -4;
    }
    avcodec_parameters_to_context(pCodecContext, pFormatContext->streams[audio_stream_idx]->codecpar);
    if(avcodec_open2(pCodecContext, pCodec, NULL) < 0) {
        LOGE("%s","解码器无法打开");
        return -5;
    }


    // ...

    (*env)->ReleaseStringUTFChars(env, input_media_jstr, input_media_cstr);
    (*env)->ReleaseStringUTFChars(env, output_pcm_jstr, output_pcm_cstr);
    return 0;
}

前期的AVFormatContext、AVCodec和AVCodecContext准备工作已经教学过了。这里就不再重复论述。

要想从AVFrame->PCM的转换，我们首先要让ffmpeg知道你想要怎样的一组pcm，所以我们需要设置采样参数。借助SwrContext能完成这项工作。在开始之前我们不妨到SwrContext的头文件swresample.h看看使用方法：

/**
 * The libswresample context. Unlike libavcodec and libavformat, this structure
 * is opaque. This means that if you would like to set options, you must use
 * the @ref avoptions API and cannot directly set values to members of the
 * structure.
 */
typedef struct SwrContext SwrContext;

/**
 * Allocate SwrContext.
 *
 * If you use this function you will need to set the parameters (manually or
 * with swr_alloc_set_opts()) before calling swr_init().
 *
 * @see swr_alloc_set_opts(), swr_init(), swr_free()
 * @return NULL on error, allocated context otherwise
 */
struct SwrContext *swr_alloc(void);

从附带的说明我们可以知道，我们不能直接设置SwrContext的参数，需要swr_alloc_set_opts(), swr_init(), swr_free()这组API。

接下来我们就正常开始AVFrame->PCM的转换工作。

    // ... ...
    
    //开始解码
    AVPacket *packet = av_packet_alloc();
    AVFrame *frame = av_frame_alloc();
    //frame->16bit双声道 采样率44100 PCM采样格式
    SwrContext *swrCtx = swr_alloc();
    //  设置采样参数-------------start
    //输入的采样格式
    enum AVSampleFormat in_sample_fmt = pCodecContext->sample_fmt;
    //输出采样格式16bit PCM
    enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;
    //输入采样率
    int in_sample_rate = pCodecContext->sample_rate;
    //输出采样率
    int out_sample_rate = 44100;
    //输入的声道布局
    uint64_t in_ch_layout = pCodecContext->channel_layout;
    //输出的声道布局（立体声）
    uint64_t out_ch_layout = AV_CH_LAYOUT_STEREO;
    //  设置采样参数---------------end
    swr_alloc_set_opts(swrCtx,
                       out_ch_layout,out_sample_fmt,out_sample_rate,
                       in_ch_layout,in_sample_fmt,in_sample_rate,
                       0, NULL);
    swr_init(swrCtx);


    //16bit 44100 PCM 数据 内存空间。
    uint8_t *out_buffer = (uint8_t *)av_malloc(MAX_AUDIO_FARME_SIZE);
    //根据声道个数 获取 匹配的声道布局（2个声道，立体声stereo）
    //av_get_default_channel_layout(codecCtx->channels);
    //根据声道布局 获取 输出的声道个数
    int out_channel_nb = av_get_channel_layout_nb_channels(out_ch_layout);

    FILE *fp_pcm = fopen(output_pcm_cstr,"wb");
    int ret;
    while(av_read_frame(pFormatContext, packet) >= 0)
    {
        if(packet->stream_index == audio_stream_idx)
        {
            ret = avcodec_send_packet(pCodecContext, packet);
            if(ret < 0) {
                LOGE("avcodec_send_packet：%d\n", ret);
                continue;
            }
            while(ret >= 0) {
                ret = avcodec_receive_frame(pCodecContext, frame);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                    LOGD("avcodec_receive_frame：%d\n", ret);
                    break;
                } else if (ret < 0) {
                    LOGW("avcodec_receive_frame：%d\n", AVERROR(ret));
                    goto end;  //end处进行资源释放等善后处理
                }

                if (ret >= 0)
                {   //AVFrame->Audio 参数要仔细填对
                    swr_convert(swrCtx, &out_buffer, MAX_AUDIO_FARME_SIZE, (const uint8_t **) frame->data, frame->nb_samples);
                    //获取有多少有效的数据在out_buffer的内存上
                    int out_buffer_size = av_samples_get_buffer_size(NULL, out_channel_nb,
                                                                     frame->nb_samples, out_sample_fmt, 1);
                    fwrite(out_buffer, 1, (size_t) out_buffer_size, fp_pcm);
                }
            }
        }
        av_packet_unref(packet);
    }
    LOGD("媒体文件转换PCM结束\n");


end:
    fclose(fp_pcm);
    av_frame_free(&frame);
    av_free(out_buffer);
    swr_free(&swrCtx);

    (*env)->ReleaseStringUTFChars(env, input_media_jstr, input_media_cstr);
    (*env)->ReleaseStringUTFChars(env, output_pcm_jstr, output_pcm_cstr);
    return 0;
}

AVPacket和AVFrame两个对象是解码器需要使用到的，之前已经介绍。接下来我们开始设置PCM的采样参数。

1、swr_alloc(); 新建采样参数的对象SwrContext。
2、根据struct SwrContext *swr_alloc_set_opts方法接口，out_ch_layout、out_sample_fmt、out_sample_rate、in_ch_layout、in_sample_fmt、in_sample_rate等参数，思路就是in_xxx一般都是从源就能获取。而out_xxx就是根据需求合理自定义的值。这里我们需要采样格式是双声道（左右声道=>立体声布局）16位采样率是44100的PCM数据。
3、swr_alloc_set_opts设置参数，swr_init初始化SwrContext。

设置格式已经完成，接下来就需要准备内存空间来存放解码的数据。#define MAX_AUDIO_FARME_SIZE 48000 * 2 因为我们是双声道的44100，刚刚好申请44100理论上也是可以的~

    //根据声道个数 获取 匹配的声道布局（双声道->立体声stereo）
    av_get_default_channel_layout(pCodecContext->channels);
    //根据声道布局 获取 输出的声道个数
    av_get_channel_layout_nb_channels(out_ch_layout);

之后介绍一对API，av_get_default_channel_layout / av_get_channel_layout_nb_channels 看注释我们可以知道av_get_default_channel_layout 是根据声道输（2）获取对应支持的声道布局（AV_CH_LAYOUT_STEREO 在channel_layout.h定义）；av_get_channel_layout_nb_channels 是通过声道布局获取对应的声道数。

之后我们就进入模板代码，从AVFormatContext循环读取压缩的AVPacket，然后avcodec_send_packet(AVCodecContext, AVPacket); 对应的解码器，然后avcodec_receive_frame(AVCodecContext , AVFrame);获取解码后的AVFrame对象。

然后我们通过swr_convert方法把AVFrame Convert To Audio. 转换后数据就在out_buffer的内存空间上了，但是我们不能直接写入文件，我们还需要通过av_samples_get_buffer_size求得具体有多少的真实数据在out_buffer，然后才能写入文件。

然后我们可以使用ffplay的命令行检验数据的有效性：

ffplay -f rawvideo -video_size 1920x1080 10s_test.yuv

ffplay -f s16le -ar 44100 -ac 2 10s_test.pcm

ffpaly可以到这里下载。

NDK学习笔记：FFmpeg解压MP34提取音频PCM（swrContext、swr_alloc_set_opts）

NDK学习笔记：FFmpeg解压MP34提取音频PCM

猜你喜欢