Audio automatic gain and silence detection algorithm with complete C code [transfer]

Reprinted from: https://www.cnblogs.com/cpuimage/p/8908551.html

I shared an algorithm before " Audio Gain Loudness Analysis ReplayGain with Complete C Code Example "

Mainly used to evaluate the volume intensity of a certain length of audio,

After the analysis, many similar needs must be done for audio gain, increasing the volume and so on.

However, when the project is actually measured, it is really difficult to set a standard.

In what kind of environment, should the volume be increased or decreased.

The general practice in the communication industry is to use silent detection.

Once it is detected as silence or noise, it will not be processed, otherwise it will be processed through a certain strategy.

There are two algorithms involved here, one is silence detection and the other is audio gain.

The gain is actually nothing to say, similar to the practice of data normalization stretching.

Silence detection in WebRTC uses computational GMM (Gaussian Mixture Model, Gaussian Mixture Model) for feature extraction.

For a long time, audio features have 3 main methods,

GMM, Spectrogram (Sound Spectrogram), MFCC is Mel-Frequency Cepstrum (Mel Frequency Cepstrum)

IMHO, the features extracted by GMM are not as robust as the latter two.

I don't give much introduction, interested students, look through Wikipedia, and make up lessons.

Of course, in the actual use of the algorithm, some tricks will be extended from this.

For example, use silence detection to do audio clipping, or do some audio enhancement with audio gain.

The automatic gain in WebRTC source code files are: analog_agc.c and digital_agc.c

Silence Detection The source code file is: webrtc_vad.c

There are some historical reasons for this name.

After sorting,

The gain algorithm is agc.c agc.h

Silence detected as vad.c vad.h

Full sample code for the gain algorithm:

copy code
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
//decode using https://github.com/mackron/dr_libs/blob/master/dr_wav.h
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include "agc.h"

#ifndef nullptr
#define nullptr 0
#endif

#ifndef MIN
#define  MIN(A, B)        ((A) < (B) ? (A) : (B))
#endif

//write wav file
void wavWrite_int16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount) {
    drwav_data_format format = {};
    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
    format.channels = 1;
    format.sampleRate = (drwav_uint32) sampleRate;
    format.bitsPerSample = 16;
    drwav *pWav = drwav_open_file_write(filename, &format);
    if (pWav) {
        drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer);
        drwav_uninit (pWav);
        if (samplesWritten != totalSampleCount) {
            fprintf(stderr, "ERROR\n");
            exit(1);
        }
    }
}

//read wav file
int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
    unsigned int channels;
    int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
    if (buffer == nullptr) {
        printf("Failed to read wav file.");
    }
    // only handle single channel audio
    if (channels != 1) {
        drwav_free(buffer);
        buffer = nullptr;
        *sampleRate = 0;
        *totalSampleCount = 0;
    }
    return buffer;
}

// split path function
void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {
    const char *end;
    const char *p;
    const char *s;
    if (path[0] && path[1] == ':') {
        if (drv) {
            *drv++ = *path++;
            *drv++ = *path++;
            *drv = '\0';
        }
    } else if (drv)
        *drv = '\0';
    for (end = path; *end && *end != ':';)
        end++;
    for (p = end; p > path && *--p != '\\' && *p != '/';)
        if (*p == '.') {
            end = p;
            break;
        }
    if (ext)
        for (s = end; (*ext = *s++);)
            ext++;
    for (p = end; p > path;)
        if (*--p == '\\' || *p == '/') {
            p++;
            break;
        }
    if (name) {
        for (s = p; s < end;)
            *name++ = *s++;
        *name = '\0';
    }
    if (dir) {
        for (s = path; s < p;)
            * dir ++ = * s ++;
        *dir = '\0';
    }
}


int agcProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t agcMode) {
    if (buffer == nullptr) return -1;
    if (samplesCount == 0) return -1;
    WebRtcAgcConfig agcConfig;
    agcConfig.compressionGaindB = 9; // default 9 dB
    agcConfig.limiterEnable = 1; // default kAgcTrue (on)
    agcConfig.targetLevelDbfs = 3; // default 3 (-3 dBOv)
    int minLevel = 0;
    int maxLevel = 255;
    size_t samples = MIN(160, sampleRate / 100);
    if (samples == 0) return -1;
    const int maxSamples = 320;
    int16_t *input = buffer;
    size_t nTotal = (samplesCount / samples);
    void *agcInst = WebRtcAgc_Create();
    if (agcInst == NULL) return -1;
    int status = WebRtcAgc_Init(agcInst, minLevel, maxLevel, agcMode, sampleRate);
    if (status != 0) {
        printf("WebRtcAgc_Init fail\n");
        WebRtcAgc_Free(agcInst);
        return -1;
    }
    status = WebRtcAgc_set_config(agcInst, agcConfig);
    if (status != 0) {
        printf("WebRtcAgc_set_config fail\n");
        WebRtcAgc_Free(agcInst);
        return -1;
    }
    size_t num_bands = 1;
    int inMicLevel, outMicLevel = -1;
    int16_t out_buffer[maxSamples];
    int16_t *out16 = out_buffer;
    uint8_t saturationWarning = 1; //Whether there is overflow, the maximum value after gain amplification exceeds 65536
    int16_t echo = 0; //Whether the gain amplification considers the echo effect
    for (int i = 0; i < nTotal; i++) {
        inMicLevel = 0;
        int nAgcRet = WebRtcAgc_Process(agcInst, (const int16_t *const *) &input, num_bands, samples,
                                        (int16_t *const *) &out16, inMicLevel, &outMicLevel, echo,
                                        &saturationWarning);

        if (nAgcRet != 0) {
            printf("failed in WebRtcAgc_Process\n");
            WebRtcAgc_Free(agcInst);
            return -1;
        }
        memcpy(input, out_buffer, samples * sizeof(int16_t));
        input += samples;
    }
    WebRtcAgc_Free(agcInst);
    return 1;
}

void auto_gain(char *in_file, char *out_file) {
    //audio sample rate
    uint32_t sampleRate = 0;
    //Total audio samples
    uint64_t inSampleCount = 0;
    int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
    //if the load was successful
    if (inBuffer != nullptr) {
        // kAgcModeAdaptiveAnalog analog volume adjustment
        // kAgcModeAdaptiveDigital adaptive gain
        // kAgcModeFixedDigital fixed gain
        agcProcess(inBuffer, sampleRate, inSampleCount, kAgcModeAdaptiveDigital);
        wavWrite_int16(out_file, inBuffer, sampleRate, inSampleCount);
        free(inBuffer);
    }
}

int main(int argc, char *argv[]) {
    printf("WebRTC Automatic Gain Control\n");
    printf("Blog: http://cpuimage.cnblogs.com/\n");
    printf("Audio auto gain\n");
    if (argc < 2)
        return -1;
    char *in_file = argv[1];
    char drive[3];
    char dir[256];
    char fname[256];
    char ext[256];
    char out_file[1024];
    splitpath(in_file, drive, dir, fname, ext);
    sprintf(out_file, "%s%s%s_out%s", drive, dir, fname, ext);
    auto_gain(in_file, out_file);

    printf("Press any key to exit the program\n");
    getchar();
    return 0;
}
copy code

 

 Complete sample code for silence detection:

copy code
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
//decode using https://github.com/mackron/dr_libs/blob/master/dr_wav.h
#define DR_WAV_IMPLEMENTATION

#include "dr_wav.h"
#include "vad.h"

#ifndef nullptr
#define nullptr 0
#endif

#ifndef MIN
#define  MIN(A, B)        ((A) < (B) ? (A) : (B))
#endif

#ifndef MAX
#define  MAX(A, B)        ((A) > (B) ? (A) : (B))
#endif


//read wav file
int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
    unsigned int channels;
    int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
    if (buffer == nullptr) {
        printf("Failed to read wav file.");
    }
    // only handle single channel audio
    if (channels != 1) {
        drwav_free(buffer);
        buffer = nullptr;
        *sampleRate = 0;
        *totalSampleCount = 0;
    }
    return buffer;
}


int vadProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t vad_mode, int per_ms_frames) {
    if (buffer == nullptr) return -1;
    if (samplesCount == 0) return -1;
    // kValidRates: 8000, 16000, 32000, 48000
    // 10, 20 or 30 ms frames
    per_ms_frames = MAX(MIN(30, per_ms_frames), 10);
    size_t samples = sampleRate * per_ms_frames / 1000;
    if (samples == 0) return -1;
    int16_t *input = buffer;
    size_t nTotal = (samplesCount / samples);

    void *vadInst = WebRtcVad_Create();
    if (vadInst == NULL) return -1;
    int status = WebRtcVad_Init (vadInst);
    if (status != 0) {
        printf("WebRtcVad_Init fail\n");
        WebRtcVad_Free (vadInst);
        return -1;
    }
    status = WebRtcVad_set_mode (vadInst, vad_mode);
    if (status != 0) {
        printf("WebRtcVad_set_mode fail\n");
        WebRtcVad_Free (vadInst);
        return -1;
    }
    printf("Activity : \n");
    for (int i = 0; i < nTotal; i++) {
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
        if (nVadRet == -1) {
            printf("failed in WebRtcVad_Process\n");
            WebRtcVad_Free (vadInst);
            return -1;
        } else {
            // output result
            printf(" %d \t", nVadRet);
        }
        input += samples;
    }
    printf("\n");
    WebRtcVad_Free (vadInst);
    return 1;
}

void vad(char *in_file) {
    //audio sample rate
    uint32_t sampleRate = 0;
    //Total audio samples
    uint64_t inSampleCount = 0;
    int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
    //if the load was successful
    if (inBuffer != nullptr) {
        //    Aggressiveness mode (0, 1, 2, or 3)
        int16_t mode = 1;
        int per_ms = 30;
        vadProcess(inBuffer, sampleRate, inSampleCount, mode, per_ms);
        free(inBuffer);
    }
}

int main(int argc, char *argv[]) {
    printf("WebRTC Voice Activity Detector\n");
    printf("Blog: http://cpuimage.cnblogs.com/\n");
    printf("Silence detection\n");
    if (argc < 2)
        return -1;
    char *in_file = argv[1];
    what (in_file);
    printf("Press any key to exit the program\n");
    getchar();
    return 0;
}
copy code

Automatic gain project address: https://github.com/cpuimage/WebRTC_AGC

The specific process is: 

Load wav (drag and drop wav file onto executable) -> gain processing -> save as _out.wav file

 

Silence detection project address: https://github.com/cpuimage/WebRTC_VAD

The specific process is: 

Load wav (drag and drop the wav file onto the executable) -> output the silence detection result

Note: 1 is not mute, 0 is mute

 For the places and parameters that should be noted, see the code comments.

The sample code can be compiled with cmake, see CMakeLists.txt for details.

 

If you have any other questions or needs, please contact me by email.

The email address is: 
[email protected]

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324758939&siteId=291194637