WebRTC 音频QoS研究(4)：NetEQ几种数据缓冲区的代码实现

WebRTC音频引擎NetEQ在处理音频数据时，主要用到了4块内存区域用于暂存处理过程中间的临时数据，分别是抖动缓冲区，解码缓冲区，DSP算法缓冲区和语音缓冲区。

抖动缓冲区用于暂存网络中到来的未解码的音频数据包，然后这些数据包会通过解码器解码成为pcm原始音频数据，这些pcm数据存放的位置就是解码缓冲区，在需要经过音频信号处理的情况下，NetEQ会将解码缓冲区中的数据进行拉伸、压缩或者是平滑处理，将处理结果放到算法缓冲区中，最后算法缓冲区中的数据会被塞到语音缓冲区中，声卡每隔10ms会从语音缓冲区中提取长度为10ms的语音数据拿去播放。接下来对这几种缓冲区的实现源码做分别的介绍。

抖动缓冲区

// A list of packets.
typedef std::list<Packet> PacketList;

// This is the actual buffer holding the packets before decoding.
class PacketBuffer {
public:
    enum BufferReturnCodes {
        kOK = 0,
        kFlushed,
        kNotFound,
        kBufferEmpty,
        kInvalidPacket,
        kInvalidPointer
      };

    // Constructor creates a buffer which can hold a maximum of
    // |max_number_of_packets| packets.
    PacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer);

    // Deletes all packets in the buffer before destroying the buffer.
    virtual ~PacketBuffer();

    // Flushes the buffer and deletes all packets in it.
    virtual void Flush();

    // Returns true for an empty buffer.
    virtual bool Empty() const;

    // Inserts |packet| into the buffer. The buffer will take over ownership of
    // the packet object.
    // Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer
    // was flushed due to overfilling.
    virtual int InsertPacket(Packet&& packet, StatisticsCalculator* stats);

    // Inserts a list of packets into the buffer. The buffer will take over
    // ownership of the packet objects.
    // Returns PacketBuffer::kOK if all packets were inserted successfully.
    // If the buffer was flushed due to overfilling, only a subset of the list is
    // inserted, and PacketBuffer::kFlushed is returned.
    // The last three parameters are included for legacy compatibility.
    // TODO(hlundin): Redesign to not use current_*_payload_type and
    // decoder_database.
    virtual int InsertPacketList(
        PacketList* packet_list,
        const DecoderDatabase& decoder_database,
        rtc::Optional<uint8_t>* current_rtp_payload_type,
        rtc::Optional<uint8_t>* current_cng_rtp_payload_type,
        StatisticsCalculator* stats);

    // Gets the timestamp for the first packet in the buffer and writes it to the
    // output variable |next_timestamp|.
    // Returns PacketBuffer::kBufferEmpty if the buffer is empty,
    // PacketBuffer::kOK otherwise.
    virtual int NextTimestamp(uint32_t* next_timestamp) const;

    // Gets the timestamp for the first packet in the buffer with a timestamp no
    // lower than the input limit |timestamp|. The result is written to the output
    // variable |next_timestamp|.
    // Returns PacketBuffer::kBufferEmpty if the buffer is empty,
    // PacketBuffer::kOK otherwise.
    virtual int NextHigherTimestamp(uint32_t timestamp,
                                    uint32_t* next_timestamp) const;

    // Returns a (constant) pointer to the first packet in the buffer. Returns
    // NULL if the buffer is empty.
    virtual const Packet* PeekNextPacket() const;

    // Extracts the first packet in the buffer and returns it.
    // Returns an empty optional if the buffer is empty.
    virtual rtc::Optional<Packet> GetNextPacket();

    // Discards the first packet in the buffer. The packet is deleted.
    // Returns PacketBuffer::kBufferEmpty if the buffer is empty,
    // PacketBuffer::kOK otherwise.
    virtual int DiscardNextPacket(StatisticsCalculator* stats);

    // Discards all packets that are (strictly) older than timestamp_limit,
    // but newer than timestamp_limit - horizon_samples. Setting horizon_samples
    // to zero implies that the horizon is set to half the timestamp range. That
    // is, if a packet is more than 2^31 timestamps into the future compared with
    // timestamp_limit (including wrap-around), it is considered old.
    virtual void DiscardOldPackets(uint32_t timestamp_limit,
                                   uint32_t horizon_samples,
                                   StatisticsCalculator* stats);

    // Discards all packets that are (strictly) older than timestamp_limit.
    virtual void DiscardAllOldPackets(uint32_t timestamp_limit,
                                    StatisticsCalculator* stats);

    // Removes all packets with a specific payload type from the buffer.
    virtual void DiscardPacketsWithPayloadType(uint8_t payload_type,
                                             StatisticsCalculator* stats);

    // Returns the number of packets in the buffer, including duplicates and
    // redundant packets.
    virtual size_t NumPacketsInBuffer() const;

    // Returns the number of samples in the buffer, including samples carried in
    // duplicate and redundant packets.
    virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;

    virtual void BufferStat(int* num_packets, int* max_num_packets) const;

    // Static method returning true if |timestamp| is older than |timestamp_limit|
    // but less than |horizon_samples| behind |timestamp_limit|. For instance,
    // with timestamp_limit = 100 and horizon_samples = 10, a timestamp in the
    // range (90, 100) is considered obsolete, and will yield true.
    // Setting |horizon_samples| to 0 is the same as setting it to 2^31, i.e.,
    // half the 32-bit timestamp range.
    static bool IsObsoleteTimestamp(uint32_t timestamp,
                                    uint32_t timestamp_limit,
                                    uint32_t horizon_samples) {
      return IsNewerTimestamp(timestamp_limit, timestamp) &&
             (horizon_samples == 0 ||
              IsNewerTimestamp(timestamp, timestamp_limit - horizon_samples));
    }

private:
    size_t max_number_of_packets_;
    PacketList buffer_;
    const TickTimer* tick_timer_;
    RTC_DISALLOW_COPY_AND_ASSIGN(PacketBuffer);
};

// member of neteq
const PacketBuffer& packet_buffer_;

由抖动缓冲区的定义来看，抖动缓冲区类的实现是由一个整型数表示抖动缓冲区所能容纳的最多网络包的数量，以及一个std::list<Packet>类型成员，此外还有一个计时器指针对象TickTimer*，Packet类是对于个网络数据包的封装。

解码缓冲区

std::unique_ptr<int16_t[]> decoded_buffer_;

// 120 ms @ 48 kHz.
static const size_t kMaxFrameSize = 5760;  

// set length of decode buffer to kMaxFrameSize 
size_t decoded_buffer_length_(kMaxFrameSize),

// malloc decode buffer
decoded_buffer_(new int16_t[decoded_buffer_length_]),

解码缓冲区的定义就是一个带符号16位整型数组，固定长度5760。

算法缓冲区

class AudioMultiVector {
public:
    // Creates an empty AudioMultiVector with |N| audio channels. |N| must be
    // larger than 0.
    explicit AudioMultiVector(size_t N);

    // Creates an AudioMultiVector with |N| audio channels, each channel having
    // an initial size. |N| must be larger than 0.
    AudioMultiVector(size_t N, size_t initial_size);

    virtual ~AudioMultiVector();

    // Deletes all values and make the vector empty.
    virtual void Clear();

    // Clears the vector and inserts |length| zeros into each channel.
    virtual void Zeros(size_t length);

    // Copies all values from this vector to |copy_to|. Any contents in |copy_to|
    // are deleted. After the operation is done, |copy_to| will be an exact
    // replica of this object. The source and the destination must have the same
    // number of channels.
    virtual void CopyTo(AudioMultiVector* copy_to) const;

    // Appends the contents of array |append_this| to the end of this
    // object. The array is assumed to be channel-interleaved. |length| must be
    // an even multiple of this object's number of channels.
    // The length of this object is increased with the |length| divided by the
    // number of channels.
    virtual void PushBackInterleaved(const int16_t* append_this, size_t length);

    // Appends the contents of AudioMultiVector |append_this| to this object. The
    // length of this object is increased with the length of |append_this|.
    virtual void PushBack(const AudioMultiVector& append_this);

    // Appends the contents of AudioMultiVector |append_this| to this object,
    // taken from |index| up until the end of |append_this|. The length of this
    // object is increased.
    virtual void PushBackFromIndex(const AudioMultiVector& append_this,
                                   size_t index);

    // Removes |length| elements from the beginning of this object, from each
    // channel.
    virtual void PopFront(size_t length);

    // Removes |length| elements from the end of this object, from each
    // channel.
    virtual void PopBack(size_t length);

    // Reads |length| samples from each channel and writes them interleaved to
    // |destination|. The total number of elements written to |destination| is
    // returned, i.e., |length| * number of channels. If the AudioMultiVector
    // contains less than |length| samples per channel, this is reflected in the
    // return value.
    virtual size_t ReadInterleaved(size_t length, int16_t* destination) const;

    // Like ReadInterleaved() above, but reads from |start_index| instead of from
    // the beginning.
    virtual size_t ReadInterleavedFromIndex(size_t start_index,
                                            size_t length,
                                            int16_t* destination) const;

    // Like ReadInterleaved() above, but reads from the end instead of from
    // the beginning.
    virtual size_t ReadInterleavedFromEnd(size_t length,
                                        int16_t* destination) const;

    // Overwrites each channel in this AudioMultiVector with values taken from
    // |insert_this|. The values are taken from the beginning of |insert_this| and
    // are inserted starting at |position|. |length| values are written into each
    // channel. If |length| and |position| are selected such that the new data
    // extends beyond the end of the current AudioVector, the vector is extended
    // to accommodate the new data. |length| is limited to the length of
    // |insert_this|.
    virtual void OverwriteAt(const AudioMultiVector& insert_this,
                             size_t length,
                             size_t position);

    // Appends |append_this| to the end of the current vector. Lets the two
    // vectors overlap by |fade_length| samples (per channel), and cross-fade
    // linearly in this region.
    virtual void CrossFade(const AudioMultiVector& append_this,
                           size_t fade_length);

    // Returns the number of channels.
    virtual size_t Channels() const;

    // Returns the number of elements per channel in this AudioMultiVector.
    virtual size_t Size() const;

    // Verify that each channel can hold at least |required_size| elements. If
    // not, extend accordingly.
    virtual void AssertSize(size_t required_size);

    virtual bool Empty() const;

    // Copies the data between two channels in the AudioMultiVector. The method
    // does not add any new channel. Thus, |from_channel| and |to_channel| must
    // both be valid channel numbers.
    virtual void CopyChannel(size_t from_channel, size_t to_channel);

    // Accesses and modifies a channel (i.e., an AudioVector object) of this
    // AudioMultiVector.
    const AudioVector& operator[](size_t index) const;
    AudioVector& operator[](size_t index);

 protected:
    std::vector<AudioVector*> channels_;
    size_t num_channels_;

 private:
    RTC_DISALLOW_COPY_AND_ASSIGN(AudioMultiVector);
};

std::unique_ptr<AudioMultiVector> algorithm_buffer_

算法缓冲区是一个AudioMultiVector类对象，包含一个vector<AudioVector*>数组以及一个通道数变量（为什么要有这个变量呢？我也有点懵），AudioMultiVector的构造函数会根据通道数创建AudioVector的数量，AudioVector的数量跟通道数是一致的，每一个AudioVector实体存出一个声道的数据，AudioMultiVector对象构造时也会同时创建AudioVector的数量，并且也可以指定AudioVector的大小，也可以用默认大小来创建AudioVector，AudioVector的的定义如下：

class AudioVector {
public:
    // Creates an empty AudioVector.
    AudioVector();

    // Creates an AudioVector with an initial size.
    explicit AudioVector(size_t initial_size);

    virtual ~AudioVector();

    // Deletes all values and make the vector empty.
    virtual void Clear();

    // Copies all values from this vector to |copy_to|. Any contents in |copy_to|
    // are deleted before the copy operation. After the operation is done,
    // |copy_to| will be an exact replica of this object.
    virtual void CopyTo(AudioVector* copy_to) const;

    // Copies |length| values from |position| in this vector to |copy_to|.
    virtual void CopyTo(size_t length, size_t position, int16_t* copy_to) const;

    // Prepends the contents of AudioVector |prepend_this| to this object. The
    // length of this object is increased with the length of |prepend_this|.
    virtual void PushFront(const AudioVector& prepend_this);

    // Same as above, but with an array |prepend_this| with |length| elements as
    // source.
    virtual void PushFront(const int16_t* prepend_this, size_t length);

    // Same as PushFront but will append to the end of this object.
    virtual void PushBack(const AudioVector& append_this);

    // Appends a segment of |append_this| to the end of this object. The segment
    // starts from |position| and has |length| samples.
    virtual void PushBack(const AudioVector& append_this,
                        size_t length,
                        size_t position);

    // Same as PushFront but will append to the end of this object.
    virtual void PushBack(const int16_t* append_this, size_t length);

    // Removes |length| elements from the beginning of this object.
    virtual void PopFront(size_t length);

    // Removes |length| elements from the end of this object.
    virtual void PopBack(size_t length);

    // Extends this object with |extra_length| elements at the end. The new
    // elements are initialized to zero.
    virtual void Extend(size_t extra_length);
  
    // Inserts |length| elements taken from the array |insert_this| and insert
    // them at |position|. The length of the AudioVector is increased by |length|.
    // |position| = 0 means that the new values are prepended to the vector.
    // |position| = Size() means that the new values are appended to the vector.
    virtual void InsertAt(const int16_t* insert_this, size_t length,
                          size_t position);

    // Like InsertAt, but inserts |length| zero elements at |position|.
    virtual void InsertZerosAt(size_t length, size_t position);

    // Overwrites |length| elements of this AudioVector starting from |position|
    // with first values in |AudioVector|. The definition of |position|
    // is the same as for InsertAt(). If |length| and |position| are selected
    // such that the new data extends beyond the end of the current AudioVector,
    // the vector is extended to accommodate the new data.
    virtual void OverwriteAt(const AudioVector& insert_this,
                             size_t length,
                             size_t position);

    // Overwrites |length| elements of this AudioVector with values taken from the
    // array |insert_this|, starting at |position|. The definition of |position|
    // is the same as for InsertAt(). If |length| and |position| are selected
    // such that the new data extends beyond the end of the current AudioVector,
    // the vector is extended to accommodate the new data.
    virtual void OverwriteAt(const int16_t* insert_this,
                             size_t length,
                             size_t position);

    // Appends |append_this| to the end of the current vector. Lets the two
    // vectors overlap by |fade_length| samples, and cross-fade linearly in this
    // region.
    virtual void CrossFade(const AudioVector& append_this, size_t fade_length);

    // Returns the number of elements in this AudioVector.
    virtual size_t Size() const;

    // Returns true if this AudioVector is empty.
    virtual bool Empty() const;

    // Accesses and modifies an element of AudioVector.
    inline const int16_t& operator[](size_t index) const {
        return array_[WrapIndex(index, begin_index_, capacity_)];
    }

    inline int16_t& operator[](size_t index) {
        return array_[WrapIndex(index, begin_index_, capacity_)];
    }

private:
    static const size_t kDefaultInitialSize = 10;

    // This method is used by the [] operators to calculate an index within the
    // capacity of the array, but without using the modulo operation (%).
    static inline size_t WrapIndex(size_t index,
                                   size_t begin_index,
                                   size_t capacity) {
        RTC_DCHECK_LT(index, capacity);
        RTC_DCHECK_LT(begin_index, capacity);
        size_t ix = begin_index + index;
        RTC_DCHECK_GE(ix, index);  // Check for overflow.
        if (ix >= capacity) {
            ix -= capacity;
        }
        RTC_DCHECK_LT(ix, capacity);
        return ix;
    }

    void Reserve(size_t n);

    void InsertByPushBack(const int16_t* insert_this, size_t length,
                          size_t position);

    void InsertByPushFront(const int16_t* insert_this, size_t length,
                           size_t position);

    void InsertZerosByPushBack(size_t length, size_t position);

    void InsertZerosByPushFront(size_t length, size_t position);

    std::unique_ptr<int16_t[]> array_;

    // Allocated number of samples in the array.    
    size_t capacity_;  

    // The index of the first sample in |array_|, except when
    // |begin_index_ == end_index_|, which indicates an empty buffer.
    size_t begin_index_;

    // The index of the sample after the last sample in |array_|.
    size_t end_index_;

    RTC_DISALLOW_COPY_AND_ASSIGN(AudioVector);
};

AudioVector中设置了一个默认大小的数字kDefaultInitialSize，也可以通过有参数的构造函数自己设定大小，成员array_表示数据capcity_表示容量，AudioVector实际上就是程序封装的一个类似于标准库vector的类，只不过大小是固定的，begin_index_和end_index_就相当于vector的begin()和end()函数，返回开始和结尾处下一位置的迭代器。

语音缓冲区

std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(crit_sect_);

class SyncBuffer : public AudioMultiVector {
public:
    SyncBuffer(size_t channels, size_t length): AudioMultiVector(channels, length),
                                                next_index_(length),
                                                end_timestamp_(0),
                                                dtmf_index_(0) {}
    // Returns the number of samples yet to play out from the buffer.
    size_t FutureLength() const;

    // Adds the contents of |append_this| to the back of the SyncBuffer. Removes
    // the same number of samples from the beginning of the SyncBuffer, to
    // maintain a constant buffer size. The |next_index_| is updated to reflect
    // the move of the beginning of "future" data.
    void PushBack(const AudioMultiVector& append_this) override;

    // Adds |length| zeros to the beginning of each channel. Removes
    // the same number of samples from the end of the SyncBuffer, to
    // maintain a constant buffer size. The |next_index_| is updated to reflect
    // the move of the beginning of "future" data.
    // Note that this operation may delete future samples that are waiting to
    // be played.
    void PushFrontZeros(size_t length);

    // Inserts |length| zeros into each channel at index |position|. The size of
    // the SyncBuffer is kept constant, which means that the last |length|
    // elements in each channel will be purged.
    virtual void InsertZerosAtIndex(size_t length, size_t position);

    // Overwrites each channel in this SyncBuffer with values taken from
    // |insert_this|. The values are taken from the beginning of |insert_this| and
    // are inserted starting at |position|. |length| values are written into each
    // channel. The size of the SyncBuffer is kept constant. That is, if |length|
    // and |position| are selected such that the new data would extend beyond the
    // end of the current SyncBuffer, the buffer is not extended.
    // The |next_index_| is not updated.
    virtual void ReplaceAtIndex(const AudioMultiVector& insert_this,
                                size_t length,
                                size_t position);

    // Same as the above method, but where all of |insert_this| is written (with
    // the same constraints as above, that the SyncBuffer is not extended).
    virtual void ReplaceAtIndex(const AudioMultiVector& insert_this,
                                size_t position);

    // Reads |requested_len| samples from each channel and writes them interleaved
    // into |output|. The |next_index_| is updated to point to the sample to read
    // next time. The AudioFrame |output| is first reset, and the |data_|,
    // |num_channels_|, and |samples_per_channel_| fields are updated.
    void GetNextAudioInterleaved(size_t requested_len, AudioFrame* output);

    // Adds |increment| to |end_timestamp_|.
    void IncreaseEndTimestamp(uint32_t increment);

    // Flushes the buffer. The buffer will contain only zeros after the flush, and
    // |next_index_| will point to the end, like when the buffer was first
    // created.
    void Flush();

    const AudioVector& Channel(size_t n) const { return *channels_[n]; }
    AudioVector& Channel(size_t n) { return *channels_[n]; }

    // Accessors and mutators.
    size_t next_index() const { return next_index_; }
    void set_next_index(size_t value);
    uint32_t end_timestamp() const { return end_timestamp_; }
    void set_end_timestamp(uint32_t value) { end_timestamp_ = value; }
    size_t dtmf_index() const { return dtmf_index_; }
    void set_dtmf_index(size_t value);

private:
    size_t next_index_;
    uint32_t end_timestamp_;  // The timestamp of the last sample in the buffer.
    size_t dtmf_index_;  // Index to the first non-DTMF sample in the buffer.

    RTC_DISALLOW_COPY_AND_ASSIGN(SyncBuffer);
};

语音缓冲区类继承了算法缓冲区类的实现，还多了两个成员变量next_index_和end_timestamp_