ffmpeg realizes aac to opus

AacToOpus.h 

//
// Created by hhy on 2020/11/23.
//

#ifndef FFMPEGTEST_AACTOOPUS_H
#define FFMPEGTEST_AACTOOPUS_H

#include <string>
#include <iostream>
#include <chrono>

#ifdef __cplusplus
extern "C" {
#endif
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
#include <libavutil/frame.h>
#include <libavutil/mem.h>
#include <libswscale/swscale.h>

#ifdef __cplusplus
}
#endif

class AudioDecoder
{
private:
    AVFrame* frame_;
    AVPacket* packet_;
    AVCodecContext* codec_ctx_;
    int codec_id_;
public:
    //Only support "opus"
    AudioDecoder();
    virtual ~AudioDecoder();
    int initialize();
    virtual int decode(AVPacket *pkt, char *buf, int &size);
    AVCodecContext* codec_ctx();
};

class AudioEncoder
{
private:
    int channels_;
    int sampling_rate_;
    AVCodecContext* codec_ctx_;
    int want_bytes_;
    AVFrame* frame_;
public:
    //Only support "aac","opus"
    AudioEncoder(int samplerate, int channelsy);
    virtual ~AudioEncoder();
    int initialize();
    //The encoder wanted bytes to call encode, if > 0, caller must feed the same bytes
    //Call after initialize successed
    int want_bytes();
    virtual int encode(AVPacket *frame, char *buf, int &size);
    AVCodecContext* codec_ctx();
};

class AudioResample
{
private:
    int src_rate_;
    int src_ch_layout_;
    int src_nb_channels_;
    enum AVSampleFormat src_sample_fmt_;
    int src_linesize_;
    int src_nb_samples_;
    uint8_t **src_data_;

    int dst_rate_;
    int dst_ch_layout_;
    int dst_nb_channels_;
    enum AVSampleFormat dst_sample_fmt_;
    int dst_linesize_;
    int dst_nb_samples_;
    uint8_t **dst_data_;

    int max_dst_nb_samples_;
    struct SwrContext *swr_ctx_;
public:
    AudioResample(int src_rate, int src_layout, enum AVSampleFormat src_fmt,
                     int src_nb, int dst_rate, int dst_layout, enum AVSampleFormat dst_fmt);
    virtual ~AudioResample();
    int initialize();
    virtual int resample(AVPacket *pcm, char *buf, int &size);
};

class AacToOpus
{
private:
    AudioDecoder *dec_;
    AudioEncoder *enc_;
    AudioResample *resample_;
    int dst_channels_;
    int dst_samplerate_;
    int size_;
    char *data_;
    int src_codec_;
    int dst_codec_;
    int enc_want_bytes_;
    FILE *src_fp;
public:
    AacToOpus(int channels, int samplerate);
    virtual ~AacToOpus();
    int initialize();
    virtual int transcode(AVPacket *pkt, char **buf, int *buf_len, int &n);
};



#endif //FFMPEGTEST_AACTOOPUS_H

AacToOpus.cpp

//
// Created by hhy on 2020/11/23.
//

#include "AacToOpus.h"
static const int kFrameBufMax   = 40960;
static const int kPacketBufMax  = 8192;
const int kMaxOpusPackets = 8;
// The max size for each OPUS packet.
const int kMaxOpusPacketSize = 4096;

AudioDecoder::AudioDecoder()
{
    frame_ = NULL;
    packet_ = NULL;
    codec_ctx_ = NULL;
}

AudioDecoder::~AudioDecoder()
{
    if (codec_ctx_) {
        avcodec_free_context(&codec_ctx_);
        codec_ctx_ = NULL;
    }
    if (frame_) {
        av_frame_free(&frame_);
        frame_ = NULL;
    }
    if (packet_) {
        av_packet_free(&packet_);
        packet_ = NULL;
    }
}

int AudioDecoder::initialize()
{
    int err = 0;

    const char* codec_name = "aac";
    const AVCodec *codec = avcodec_find_decoder_by_name(codec_name);//avcodec_find_decoder_by_name(codec_name);
    if (!codec) {
        printf("avcodec_find_encoder error!\n");
        return -1;
    }

    codec_ctx_ = avcodec_alloc_context3(codec);
    if (!codec_ctx_) {
        printf("avcodec_alloc_context3 error!\n");
        return -1;
    }

    if (avcodec_open2(codec_ctx_, codec, NULL) < 0) {
        printf("avcodec_open2 error!\n");
        return -1;
    }

    frame_ = av_frame_alloc();
    if (!frame_) {
        printf("av_frame_alloc error!\n");
        return -1;
    }

    packet_ = av_packet_alloc();
    if (!packet_) {
        printf("av_packet_alloc error!\n");
        return -1;
    }

    return err;
}

int AudioDecoder::decode(AVPacket *pkt, char *buf, int &size)
{
    int err = 0;

    packet_->data = (uint8_t *)pkt->data;
    packet_->size = pkt->size;

    int ret = avcodec_send_packet(codec_ctx_, packet_);
    if (ret < 0) {
        return -1;
    }

    int max = size;
    size = 0;
    int i, ch;

    while (ret >= 0) {
        ret = avcodec_receive_frame(codec_ctx_, frame_);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            return err;
        } else if (ret < 0) {
            return -1;
        }

        int pcm_size = av_get_bytes_per_sample(codec_ctx_->sample_fmt);
        if (pcm_size < 0) {
            return -1;
        }

//        for (int i = 0; i < frame_->nb_samples; i++) {
//            if (size + pcm_size * codec_ctx_->channels <= max) {
//                memcpy(buf + size,frame_->data[0] + pcm_size*codec_ctx_->channels * i, pcm_size * codec_ctx_->channels);
//                size += pcm_size * codec_ctx_->channels;
//            }
//        }

        int planar = av_sample_fmt_is_planar(codec_ctx_->sample_fmt);
        for (i = 0; i < frame_->nb_samples; i++) {
            for (ch = 0; ch < codec_ctx_->channels; ch++) {
                //fwrite(frame_->data[ch] + pcm_size*i, 1, pcm_size, outfile);
                memcpy(buf + size,frame_->data[0] + pcm_size * i, pcm_size);
                size += pcm_size;
                break;
            }
        }
    }

    return err;
}

AVCodecContext* AudioDecoder::codec_ctx()
{
    return codec_ctx_;
}

AudioEncoder::AudioEncoder(int samplerate, int channels)
        : channels_(channels),
          sampling_rate_(samplerate),
          want_bytes_(0)
{
    codec_ctx_ = NULL;
}

AudioEncoder::~AudioEncoder()
{
    if (codec_ctx_) {
        avcodec_free_context(&codec_ctx_);
    }

    if (frame_) {
        av_frame_free(&frame_);
    }

}

int AudioEncoder::initialize()
{
    int err = 0;

    frame_ = av_frame_alloc();
    if (!frame_) {
        return -1;
    }

    const char* codec_name = "libopus";
    const AVCodec *codec = avcodec_find_encoder_by_name(codec_name); //avcodec_find_encoder(AV_CODEC_ID_OPUS);//AV_CODEC_ID_PCM_MULAW
    if (!codec) {
        return -1;
    }

    codec_ctx_ = avcodec_alloc_context3(codec);
    if (!codec_ctx_) {
        return -1;
    }

    codec_ctx_->sample_rate = sampling_rate_;
    codec_ctx_->channels = channels_;
    codec_ctx_->channel_layout = av_get_default_channel_layout(channels_);
    //codec_ctx_->channel_layout = 3;
    codec_ctx_->bit_rate = 48000;

    codec_ctx_->sample_fmt = AV_SAMPLE_FMT_FLT;//AV_SAMPLE_FMT_S16;//
    //TODO: for more level setting
//    codec_ctx_->compression_level = 1;
//    codec_ctx_->sample_fmt = AV_SAMPLE_FMT_FLTP;
//
    //TODO: The encoder 'opus' is experimental but experimental codecs are not enabled, add '-strict -2' if you want to use it.
    codec_ctx_->strict_std_compliance = -2;


    // TODO: FIXME: Show detail error.
    if (avcodec_open2(codec_ctx_, codec, NULL) < 0) {
        printf("Could not open codec\n");
        return -1;
    }

    // Return number of bytes per sample.
    int n_bytes_per_sample = av_get_bytes_per_sample(codec_ctx_->sample_fmt);
    want_bytes_ = codec_ctx_->channels * codec_ctx_->frame_size * n_bytes_per_sample;
    printf("want_bytes_:%d\n", want_bytes_);

    frame_->format = codec_ctx_->sample_fmt;
    frame_->nb_samples = codec_ctx_->frame_size;
    frame_->channel_layout = codec_ctx_->channel_layout;

    if (av_frame_get_buffer(frame_, 0) < 0) {
        printf("Could not get audio frame buffer\n");
        return -1;
    }

    return err;
}

int AudioEncoder::want_bytes()
{
    return want_bytes_;
}

int AudioEncoder::encode(AVPacket *frame, char *buf, int &size)
{
    int err = 0;

    if (want_bytes_ > 0 && frame->size != want_bytes_) {
        printf("invalid frame size %d, should be %d\n", frame->size, want_bytes_);
        return -1;
    }

    // TODO: Directly use frame?
    memcpy(frame_->data[0], frame->data, frame->size);

    /* send the frame for encoding */
    int r0 = avcodec_send_frame(codec_ctx_, frame_);
    if (r0 < 0) {
        printf("Error sending the frame to the encoder, %d\n", r0);
        return -1;
    }

    AVPacket pkt;
    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;

    /* read all the available output packets (in general there may be any
     * number of them */
    size = 0;
    while (r0 >= 0) {
        r0 = avcodec_receive_packet(codec_ctx_, &pkt);
        if (r0 == AVERROR(EAGAIN) || r0 == AVERROR_EOF) {
            //printf("Failed AVERROR r0 %d\n", r0);
            break;
        } else if (r0 < 0) {
            printf("Failed during decoding %d\n", r0);
            return -1;
        }

        //TODO: fit encoder out more pkt
        memcpy(buf, pkt.data, pkt.size);
        size = pkt.size;
        av_packet_unref(&pkt);

        // TODO: FIXME: Refine api, got more than one packets.
    }

    return err;
}

AVCodecContext* AudioEncoder::codec_ctx()
{
    return codec_ctx_;
}

AudioResample::AudioResample(int src_rate, int src_layout, enum AVSampleFormat src_fmt,
                             int src_nb, int dst_rate, int dst_layout, AVSampleFormat dst_fmt)
        : src_rate_(src_rate),
          src_ch_layout_(src_layout),
          src_sample_fmt_(src_fmt),
          src_nb_samples_(src_nb),
          dst_rate_(dst_rate),
          dst_ch_layout_(dst_layout),
          dst_sample_fmt_(dst_fmt)
{
    src_nb_channels_ = 0;
    dst_nb_channels_ = 0;
    src_linesize_ = 0;
    dst_linesize_ = 0;
    dst_nb_samples_ = 0;
    src_data_ = NULL;
    dst_data_ = 0;

    max_dst_nb_samples_ = 0;
    swr_ctx_ = NULL;
}

AudioResample::~AudioResample()
{
    if (src_data_) {
        av_freep(&src_data_[0]);
        av_freep(&src_data_);
        src_data_ = NULL;
    }
    if (dst_data_) {
        av_freep(&dst_data_[0]);
        av_freep(&dst_data_);
        dst_data_ = NULL;
    }
    if (swr_ctx_) {
        swr_free(&swr_ctx_);
        swr_ctx_ = NULL;
    }
}

int AudioResample::initialize()
{
    int err = 0;

    swr_ctx_ = swr_alloc();
    if (!swr_ctx_) {
        printf("Failed swr_ctx_ is nil\n");
        return -1;
    }

    av_opt_set_int(swr_ctx_, "in_channel_layout",    src_ch_layout_, 0);
    av_opt_set_int(swr_ctx_, "in_sample_rate",       src_rate_, 0);
    av_opt_set_sample_fmt(swr_ctx_, "in_sample_fmt", src_sample_fmt_, 0);

    av_opt_set_int(swr_ctx_, "out_channel_layout",    dst_ch_layout_, 0);
    av_opt_set_int(swr_ctx_, "out_sample_rate",       dst_rate_, 0);
    av_opt_set_sample_fmt(swr_ctx_, "out_sample_fmt", dst_sample_fmt_, 0);

    int ret;
    if ((ret = swr_init(swr_ctx_)) < 0) {
        printf("Failed to initialize the resampling context\n");
        return -1;
    }

    src_nb_channels_ = av_get_channel_layout_nb_channels(src_ch_layout_);
    ret = av_samples_alloc_array_and_samples(&src_data_, &src_linesize_, src_nb_channels_,
                                             src_nb_samples_, src_sample_fmt_, 0);
    if (ret < 0) {
        printf("Could not allocate source samples\n");
        return -1;
    }

    max_dst_nb_samples_ = dst_nb_samples_ =
            av_rescale_rnd(src_nb_samples_, dst_rate_, src_rate_, AV_ROUND_UP);

    dst_nb_channels_ = av_get_channel_layout_nb_channels(dst_ch_layout_);
    ret = av_samples_alloc_array_and_samples(&dst_data_, &dst_linesize_, dst_nb_channels_,
                                             dst_nb_samples_, dst_sample_fmt_, 0);
    if (ret < 0) {
        printf("Could not allocate destination samples\n");
        return -1;
    }

    return err;
}

int AudioResample::resample(AVPacket *pcm, char *buf, int &size)
{
    int err = 0;

    int ret, plane = 1;
    if (src_sample_fmt_ == AV_SAMPLE_FMT_FLTP) {
        plane = 2;
    }
    if (src_linesize_ * plane < pcm->size || pcm->size < 0) {
        printf("Failed size not ok\n");
        return -1;
    }
    memcpy(src_data_[0], pcm->data, pcm->size);

    dst_nb_samples_ = av_rescale_rnd(swr_get_delay(swr_ctx_, src_rate_) +
                                     src_nb_samples_, dst_rate_, src_rate_, AV_ROUND_UP);
    if (dst_nb_samples_ > max_dst_nb_samples_) {
        av_freep(&dst_data_[0]);
        ret = av_samples_alloc(dst_data_, &dst_linesize_, dst_nb_channels_,
                               dst_nb_samples_, dst_sample_fmt_, 1);
        if (ret < 0) {
            printf("Failed alloc error\n");
            return -1;
        }
        max_dst_nb_samples_ = dst_nb_samples_;
    }

    ret = swr_convert(swr_ctx_, dst_data_, dst_nb_samples_, (const uint8_t **)src_data_, src_nb_samples_);
    if (ret < 0) {
        printf("Failed while converting\"\n");
        return -1;
    }

    int dst_bufsize = av_samples_get_buffer_size(&dst_linesize_, dst_nb_channels_,
                                                 ret, dst_sample_fmt_, 1);
    if (dst_bufsize < 0) {
        printf("Failed Could not get sample buffer size\"\n");
        return -1;
    }

    int max = size;
    size = 0;
    if (max >= dst_bufsize) {
        memcpy(buf, dst_data_[0], dst_bufsize);
        size = dst_bufsize;
    }

    return err;
}


AacToOpus::AacToOpus(int channels, int samplerate)
        : dst_channels_(channels),
          dst_samplerate_(samplerate)
{
    size_ = 0;
    data_ = NULL;

    dec_ = NULL;
    enc_ = NULL;
    resample_ = NULL;

//    src_fp = fopen("./audio.opus", "w+b");
//    if (!src_fp) {
//        printf("Couldn't open output file.\n");
//    }
}

AacToOpus::~AacToOpus()
{
    if (dec_) {
        delete dec_;
        dec_ = nullptr;
    }
    if (enc_) {
        delete enc_;
        enc_ = nullptr;
    }
    if (resample_) {
        delete resample_;
        resample_ = nullptr;
    }
    if (data_) {
        delete data_;
        data_ = nullptr;
    }

//    if (src_fp) {
//        fclose(src_fp);
//    }
}

int AacToOpus::initialize()
{
    int err = 0;

    dec_ = new AudioDecoder();
    if ((err = dec_->initialize()) != 0) {
        return -1;
    }

    enc_ = new AudioEncoder(dst_samplerate_, dst_channels_);
    if ((err = enc_->initialize()) != 0) {
        return -1;
    }

    enc_want_bytes_ = enc_->want_bytes();
    if (enc_want_bytes_ > 0) {
        data_ = new char[enc_want_bytes_];
    }

    return err;
}

int AacToOpus::transcode(AVPacket *pkt, char **buf, int *buf_len, int &n)
{
    int err = 0;

    if (!dec_) {
        return -1;
    }

    int decode_len = kPacketBufMax;
    static char decode_buffer[kPacketBufMax];
    if ((err = dec_->decode(pkt, decode_buffer, decode_len)) != 0) {
        return -1;
    }
    printf("decode len:%d\n", decode_len);
    if (!resample_) {
        int channel_layout = av_get_default_channel_layout(dst_channels_);
        AVCodecContext *codec_ctx = dec_->codec_ctx();
        resample_ = new AudioResample(codec_ctx->sample_rate, (int)codec_ctx->channel_layout, \
                        codec_ctx->sample_fmt, codec_ctx->frame_size, dst_samplerate_, channel_layout, \
                        enc_->codec_ctx()->sample_fmt);
        if ((err = resample_->initialize()) != 0) {
            return -1;
        }
    }

    AVPacket pcm;
    av_init_packet(&pcm);
    pcm.data = (uint8_t *)decode_buffer;
    pcm.size = decode_len;
    int resample_len = kFrameBufMax;
    static char resample_buffer[kFrameBufMax];
    static char encode_buffer[kPacketBufMax];
    if ((err = resample_->resample(&pcm, resample_buffer, resample_len)) != 0) {
        av_packet_unref(&pcm);
        return -1;
    }

    n = 0;

    // We can encode it in one time.
    if (enc_want_bytes_ <= 0) {
        int encode_len;
        pcm.data = (uint8_t *)data_;
        pcm.size = size_;

        if ((err = enc_->encode(&pcm, encode_buffer, encode_len)) != 0) {
            av_packet_unref(&pcm);
            return -1;
        }

        memcpy(buf[n], encode_buffer, encode_len);
        buf_len[n] = encode_len;
        n++;

        av_packet_unref(&pcm);
        return err;
    }

    // Need to refill the sample to data, because the frame size is not matched to encoder.
    int data_left = resample_len;
    if (size_ + data_left < enc_want_bytes_) {
        memcpy(data_ + size_, resample_buffer, data_left);
        size_ += data_left;
        av_packet_unref(&pcm);
        return err;
    }

    int index = 0;
    while (1) {
        data_left = data_left - (enc_want_bytes_ - size_);
        memcpy(data_ + size_, resample_buffer + index, enc_want_bytes_ - size_);
        index += enc_want_bytes_ - size_;
        size_ += enc_want_bytes_ - size_;

        int encode_len;
        pcm.data = (uint8_t *)data_;
        pcm.size = size_;
        if ((err = enc_->encode(&pcm, encode_buffer, encode_len)) != 0) {
            av_packet_unref(&pcm);
            return -1;
        }

        if (encode_len > 0) {
            memcpy(buf[n], encode_buffer, encode_len);
            buf_len[n] = encode_len;
            n++;
        }

        size_ = 0;
        if(!data_left) {
            break;
        }

        if(data_left < enc_want_bytes_) {
            memcpy(data_ + size_, resample_buffer + index, data_left);
            size_ += data_left;
            break;
        }
    }

    av_packet_unref(&pcm);
    return err;
}

 main.cpp

#include <iostream>
#include <memory>
#include <list>
#include "AacToOpus.h"

static const int kFrameBufMax   = 40960;
static const int kPacketBufMax  = 8192;
const int kMaxOpusPackets = 8;
// The max size for each OPUS packet.
const int kMaxOpusPacketSize = 4096;

int main(int argc, char* argv[]) {
    std::cout << "Hello, World!" << std::endl;

    const std::string input_ = "输入rtsp url";
    AVFormatContext *pFormatCtx = avformat_alloc_context();
    int audioIndex = -1;
    AVPacket *packet;

    static char* opus_payloads[kMaxOpusPackets];

    AVDictionary *options = nullptr;
    av_dict_set(&options, "rtsp_transport", "tcp", 0);
    std::cout << "url:" << input_.c_str() << std::endl;
    if (avformat_open_input(&pFormatCtx, input_.c_str(), NULL, &options) != 0) {
        printf("Couldn't open input stream.\n");
        if (options) av_dict_free(&options);
        avformat_close_input(&pFormatCtx);
        return -1;
    }

    std::cout << "avformat_find_stream_info start" << std::endl;

    if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
        printf("Couldn't find stream information.\n");
        if (options) av_dict_free(&options);
        avformat_close_input(&pFormatCtx);
        return -1;
    }

    av_dump_format(pFormatCtx, NULL, input_.c_str(), 0);

    for (int i = 0; i < pFormatCtx->nb_streams; i++) {
        if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            if (audioIndex != -1) {
                continue;
            }
            audioIndex = i;

            uint64_t channel = pFormatCtx->streams[audioIndex]->codec->channels;
            uint64_t sample_rate = pFormatCtx->streams[audioIndex]->codec->sample_rate;
            enum AVMediaType codec_type = pFormatCtx->streams[audioIndex]->codec->codec_type;
            int64_t bit_rate = pFormatCtx->streams[audioIndex]->codec->bit_rate;
            int64_t channel_layout = pFormatCtx->streams[audioIndex]->codec->channel_layout;
            enum AVSampleFormat sample_fmt = pFormatCtx->streams[audioIndex]->codec->sample_fmt;
            //channel:2,sample_rate:48000,codec_type:1,bit_rate:0,channel_layout:3
            std::cout <<"channel:" << channel<<",sample_rate:" << sample_rate<<",codec_type:" << codec_type<<",bit_rate:" << bit_rate<<",channel_layout:" << channel_layout <<std::endl;

        }
    }

    packet = (AVPacket *) av_malloc(sizeof(AVPacket));
    AVFrame *decoded_frame = NULL;

    std::shared_ptr<AacToOpus> opus_ptr;
    opus_ptr.reset(new AacToOpus(2, 48000));

    if (opus_ptr->initialize() != 0) {
        std::cout << "opus init error" << std::endl;
    }


    for (;;) {
        auto time1 = std::chrono::steady_clock::now();
        if (av_read_frame(pFormatCtx, packet) >= 0) {
            if (packet->stream_index == audioIndex) {

                if (packet->size) {
                    if (opus_ptr) {
                        static char* opus_payloads[kMaxOpusPackets];
                        static char opus_packets_cache[kMaxOpusPackets][kMaxOpusPacketSize];
                        opus_payloads[0] = &opus_packets_cache[0][0];
                        for (int i = 1; i < kMaxOpusPackets; i++) {
                            opus_payloads[i] = opus_packets_cache[i];
                        }

                        int nn_opus_packets = 0;
                        int opus_sizes[kMaxOpusPackets];
                        if (opus_ptr->transcode(packet, opus_payloads, opus_sizes, nn_opus_packets) != 0) {
                            std::cout << "opus transcode error" << std::endl;
                        }

                        for (int i = 0; i < nn_opus_packets; i++) {
                            timestamp += 960;
                            std::cout << "opus size:"<< opus_sizes[i] << std::endl;
                            //send data 
                        }
                    }
                    //std::cout << "recv audio dts:" << packet->dts << std::endl;
                }
            }

            av_free_packet(packet);

        } else {
            break;
        }
    }


    if (packet) av_free_packet(packet);
    if (options) av_dict_free(&options);
    avformat_close_input(&pFormatCtx);

    av_frame_free(&decoded_frame);


    return 0;
}

note:

1. The code mainly refers to the aac to opus of srs

2. Aac is a dual-channel only need to obtain one channel data, otherwise the sound will have problems after encoding

3.main.cpp code needs to be modified

4. First, you may need to recompile ffmpeg, support aac, opus, add ./configure --enable-encoder=opus --enable-encoder=libopus --enable-libopus and other options

Guess you like

Origin blog.csdn.net/hyl999/article/details/109994211