libopus implementiert die PCM-Kodierung für opus

opus ist ein Audioformat, das häufig für Sprachanrufe und Videokonferenzen verwendet wird. Kürzlich habe ich eine Kodierung von PCM zu Opus gemacht und bin auf viele Fallstricke gestoßen, also möchte ich es hier aufzeichnen.

Inhaltsverzeichnis

1. Grundkenntnisse

2. Prozess verwenden

2.1 Erstellen Sie einen Encoder

2.2 Geberkonfiguration

2.3 Kodierung

2.4 Vollständiger Code

3. Ergebnisüberprüfung

4. Referenzmaterialien

1. Grundkenntnisse

Opus unterstützt Frame-Längen von 2,5, 5, 10, 20, 40, 60 ms usw. Für ein 48000-kHz-16-Bit-Zweikanal-Audio mit 20 ms PCM beträgt die Anzahl der Samples pro ms 48000/1000 = 48 und die Bittiefe 48000/1000 = 48 16bit /8 = 2byte, also die erforderliche Anzahl von PCM-Bytes

   pcm size = 48 样本/ms  X 20ms X 2byte X 2 channel = 3840 byte

Für 2-Kanal-PCM-Daten, die mit 16 Bit abgetastet werden, ist das Speicherlayout in der folgenden Abbildung dargestellt

LLLL LLLL LLLL LLLL RRRR RRRR RRRR RRRR

Die Opus-Codierungsfunktion ist opus_encode, und ihr Eingabe-Array ist das opus_int16-Array, 2 Bytes. Es muss vom unsigned char-Array in das opus_int16-Array konvertiert werden, bevor es an den Encoder gesendet werden kann.

2. Prozess verwenden

2.1 Erstellen Sie einen Encoder

OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create(
    opus_int32 Fs,
    int channels,
    int application,
    int *error
);

fs: Abtastrate, eine von 8000, 12000, 16000, 24000, 48000

Kanäle: Anzahl der Kanäle

Anwendung: Kodierungsmodus, es gibt drei Arten:

OPUS_APPLICATION_VOIP: Sprachsignale verarbeiten, geeignet für VoIP-Geschäftsszenarien

OPUS_APPLICATION_AUDIO: Dieser Modus eignet sich für Nicht-Sprachinhalte wie Musiktypen

OPUS_APPLICATION_RESTRICTED_LOWDELAY: Modus mit niedriger Latenz

Fehler: codierter Rückgabewert

2.2 Geberkonfiguration

opus_encoder_ctl(OpusEncoder *st, int request, ...)

st: Struktur erstellt von opus_encoder_create

Anfrage: vom Makro definierte Konfigurationsparameter

typische Konfiguration

    opus_encoder_ctl(encoder, OPUS_SET_VBR(0));//0:CBR, 1:VBR
    opus_encoder_ctl(encoder, OPUS_SET_VBR_CONSTRAINT(true));
    opus_encoder_ctl(encoder, OPUS_SET_BITRATE(96000));
    opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(8));//8    0~10
    opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
    opus_encoder_ctl(encoder, OPUS_SET_LSB_DEPTH(16));//每个采样16个bit，2个byte
    opus_encoder_ctl(encoder, OPUS_SET_DTX(0));
    opus_encoder_ctl(encoder, OPUS_SET_INBAND_FEC(0));

2.3 Kodierung

OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode(
    OpusEncoder *st,
    const opus_int16 *pcm,
    int frame_size,
    unsigned char *data,
    opus_int32 max_data_bytes
)

st: Opus-Encoder-Instanz

pcm: Die Eingabe-PCM-Daten, wenn die zwei Kanäle verwendet werden, sind die Daten verschachtelt, und die Größe ist frame_size x Kanäle x sizeof(ipus_int16).

frme_size: Die Anzahl der Samples des Eingangsaudiosignals in jedem Kanal, hier ist nicht die Größe des pcm-Arrays. Wenn beispielsweise eine 48000-Hz-Codierung verwendet wird und die Framelänge 20 ms beträgt, sollte frame_size 48 * 2 = sein 960, PCM-Zuweisungsgröße = Rahmengröße x Kanäle x Größe von (ipus_int16).

data: Ausgangspuffer, codierte Daten empfangen

max_data_bytes: Ausgabepuffergröße

Rückgabewert: die tatsächliche Größe der codierten Ausgabedaten

2.4 Vollständiger Code

base_type.h


#ifndef __BASE_TYPE_H__
#define __BASE_TYPE_H__
typedef struct StreamInfo
{
    unsigned char *data;
    int len;
    int dts;
}StreamInfo;

#endif

OpusEncoderImpl.h


#ifndef __OPUSENCODERIMPL_H
#define __OPUSENCODERIMPL_H 
#include "include/opus/opus.h"
#include <vector>
#include <mutex>
#include "base_type.h"
#include <queue>
#include <thread>


class OpusEncoderImpl
{
private:
   OpusEncoder *encoder;
   const int channel_num;
   int sample_rate;
   std::queue<StreamInfo> info_queue;
   std::queue <unsigned char> pcm_queue;
   std::mutex mutex;
   bool isRuning = true;
   std::mutex access_mutex;
   std::unique_ptr<std::thread> m_thread;
public:
    OpusEncoderImpl(int sampleRate, int channel);
    void Feed(unsigned char*data, int len);
    bool PopFrame(StreamInfo &info);
    void EncodeRun();
    void Stop();
    ~OpusEncoderImpl();
};

OpusEncoderImpl.cpp

#include "OpusEncoderImpl.h"
#include "OpusDecoderImpl.h"
#include <unistd.h>
#include <stdlib.h>
#define MAX_PACKET_SIZE 3*1276

/*
* sampleRate:采样率
* channel：通道数
*/

OpusEncoderImpl::OpusEncoderImpl(int sampleRate, int channel):channel_num(channel),sample_rate(sampleRate)
 {
    int err;
    int applications[3] = {OPUS_APPLICATION_AUDIO, OPUS_APPLICATION_VOIP, OPUS_APPLICATION_RESTRICTED_LOWDELAY};
   
    encoder = opus_encoder_create(sampleRate, channel_num, applications[1], &err);

    if(err != OPUS_OK || encoder == NULL) {
        printf("打开opus 编码器失败\n");
    }

    opus_encoder_ctl(encoder, OPUS_SET_VBR(0));//0:CBR, 1:VBR
    opus_encoder_ctl(encoder, OPUS_SET_VBR_CONSTRAINT(true));
    opus_encoder_ctl(encoder, OPUS_SET_BITRATE(96000));
    opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(8));//8    0~10
    opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
    opus_encoder_ctl(encoder, OPUS_SET_LSB_DEPTH(16));//每个采样16个bit，2个byte
    opus_encoder_ctl(encoder, OPUS_SET_DTX(0));
    opus_encoder_ctl(encoder, OPUS_SET_INBAND_FEC(0));

    EncodeRun();
}
//每一帧pcm 是23ms
void OpusEncoderImpl::Feed(unsigned char *data, int len) {
    mutex.lock();
    for(auto i = 0;i < len;i++) {
        pcm_queue.emplace(data[i]);
    }
    mutex.unlock();
}

bool OpusEncoderImpl::PopFrame(StreamInfo &info) {
    if(info_queue.size() > 0) {
        access_mutex.lock();
        info = info_queue.front();
        info_queue.pop();
        access_mutex.unlock();
        return true;
    }

    return false;
}


//48000 采样率，48个样本/ms * 20ms * 2 channel = 1920
void OpusEncoderImpl::EncodeRun() {
    m_thread = std::make_unique<std::thread>([this](){
        const int frame_size = 48*20;//960
        const int input_len = sizeof(opus_int16) * frame_size * 2;

        FILE *opus_file = fopen("/data/bin/out.customopus", "wb+");
        FILE *pcm_file = fopen("/data/bin/out.pcm", "wb+");
        OpusDecoderImpl decoder(48000, channel_num);

        opus_int16 input_data[frame_size * 2] = {0};//frame_size*channels*sizeof(opus_int16)
        unsigned char input_buffer[input_len] = {0};//每一帧的数据量
        unsigned char out_data[MAX_PACKET_SIZE] = {0};
        
        while (isRuning) {   
            if(pcm_queue.size() >= input_len) {
                mutex.lock();              
                for(int i = 0;i < input_len;i++) 
                {
                    input_buffer[i] = pcm_queue.front();
                    pcm_queue.pop();
                }

                // for (size_t i = 0; i < frame_size * channel_num; i++)
                // {
                //     input_data[i] = input_buffer[2*i + 1] << 8 | input_buffer[2*i];
                // }

                mutex.unlock();
                memcpy(input_data, input_buffer, input_len);
                // fwrite(input_buffer, 1, input_len, pcm_file);
                // fflush(pcm_file);          
                auto ret = opus_encode(encoder, input_data, frame_size, out_data, MAX_PACKET_SIZE);
                if(ret < 0) {
                    printf("opus编码失败, %d\n", ret);
                    break;
                }

                //写入文件
                // uint32_t len = static_cast<int>(ret);
                // fwrite(&len, 1, sizeof(uint32_t), opus_file);
                //fwrite(out_data, 1, ret, opus_file);              
                //fflush(opus_file);             
                unsigned char* opus_buffer = (unsigned char*)malloc(ret);
                memcpy(opus_buffer, out_data, ret);
                //decoder.Decode(opus_buffer, ret);

                StreamInfo info;
                info.data = opus_buffer;
                info.len = ret;
                info.dts = 20;
                access_mutex.lock();             
                info_queue.push(info);
                access_mutex.unlock();                      
                
            }else {
                usleep(1000);
            }  
        } 
    });

}

void OpusEncoderImpl::Stop() {
    isRuning = false;
    m_thread->join();

    while (pcm_queue.size() > 0)
    {
        pcm_queue.pop();
    }

    opus_encoder_destroy(encoder);
    
}

OpusEncoderImpl::~OpusEncoderImpl() {

}

3. Ergebnisüberprüfung

Die erste Überprüfungsmethode besteht darin, die codierte Datei in ogg zu packen oder die codierten Daten in pcm zu decodieren und Audacity zu verwenden, um sie anzuzeigen.Letzteres wird hier verwendet.

OpusDecoderImpl.h


#ifndef __OPUSDECODERIMPL_H
#define __OPUSDECODERIMPL_H
#include <stdio.h>
#include "include/opus/opus.h"
#include <vector>
#include <mutex>
#include "base_type.h"
#include <queue>
#include <thread>

class OpusDecoderImpl
{
private:
    /* data */
    OpusDecoder *decoder;
    int sample_rate;
    int channel_num;
    FILE *pcm_file;
public:
    bool Decode(unsigned char* in_data, int len);
    OpusDecoderImpl(int sampleRate, int channel);
    ~OpusDecoderImpl();
};

#endif

OpusDecoderImpl.cpp

#include "OpusDecoderImpl.h"
#define MAX_FRAME_SIZE 6*960
#define CHANNELS 2

OpusDecoderImpl::OpusDecoderImpl(int sampleRate, int channel) 
{
    int err;
    decoder = opus_decoder_create(sampleRate, channel, &err);
    opus_decoder_ctl(decoder, OPUS_SET_LSB_DEPTH(16));
    sample_rate = sample_rate;
    channel_num = channel;
    if(err < 0 || decoder == NULL)
    {
        printf("创建解码器失败\n");
        return;
    }

    pcm_file = fopen("/data/bin/decode.pcm", "wb+");
}

bool OpusDecoderImpl::Decode(unsigned char* in_data, int len)
{
    unsigned char pcm_bytes[MAX_FRAME_SIZE * CHANNELS * 2];
    opus_int16 out[MAX_FRAME_SIZE * CHANNELS];
    auto frame_size = opus_decode(decoder, in_data, len, out, MAX_FRAME_SIZE, 0);

    if (frame_size < 0)
    {
       printf("解码失败\n");
       return false;
    }

    for (auto i = 0; i < channel_num * frame_size; i++)
    {
        pcm_bytes[2 * i] = out[i] & 0xFF;
        pcm_bytes[2 * i + 1] = (out[i] >> 8) & 0xFF;
    }

    fwrite(pcm_bytes, sizeof(short), frame_size * channel_num, pcm_file);
    fflush(pcm_file);
    return true;
}

OpusDecoderImpl::~OpusDecoderImpl()
{

}

Den Funktionsaufruf erfinden (ps: der Code kann Grammatikfehler enthalten, bitte selbst lösen)

int main(int argc, char** argc)
{
    OpusEncoderImpl opusEncoder = new OpusEncoderImpl(48000, 2);

    for (size_t i = 0; i < 100; i++)
    {
        opusEncoder.Feed(pcm_data, pcm_len);//送pcm 数据编码
    }

    //读取编码后的opus，一般放在单独线程，这里只是为了方便
    StreamInfo info;
    while (opusEncoder.PopFrame(info))
    {
        .....
    }
    
    opusEncoder.Stop();
    
}

4. Referenzmaterialien

Einführung in die Audio- und OPUS-Open-Source-Bibliothek von blog_WuYuJuns Blog-CSDN blog_opus-Bibliothek

Audio- und Video-Codec – Opus-Codec-Reihe 1_Fenngtun's Blog-CSDN Blog

Opus-Codierung von Qt