在linux系统上实现实时AEC功能

我们从今天起，开始做一个实时的回声消除应用。

这个应用的工作环境描述如下：

1】linux系统中

2】系统通过电脑的喇叭播放音乐

3】在播放音乐过程中打开录音功能进行录音

我们的应用的目标：

1】录音要求输出两个基本的音频文件：录音过程中喇叭的音频、MIC拾到的声音（说话声+喇叭回声）

2】对MIC的拾到的声音进行回声消除处理，输出回声消除后的音频文件

先看一下alsa支持的pcm信息

ALSA library version: 1.1.3

PCM stream types:
 PLAYBACK
 CAPTURE

PCM access types:
 MMAP_INTERLEAVED
 MMAP_NONINTERLEAVED
 MMAP_COMPLEX
 RW_INTERLEAVED
 RW_NONINTERLEAVED

PCM formats:
 S8 (Signed 8 bit)
 U8 (Unsigned 8 bit)
 S16_LE (Signed 16 bit Little Endian)
 S16_BE (Signed 16 bit Big Endian)
 U16_LE (Unsigned 16 bit Little Endian)
 U16_BE (Unsigned 16 bit Big Endian)
 S24_LE (Signed 24 bit Little Endian)
 S24_BE (Signed 24 bit Big Endian)
 U24_LE (Unsigned 24 bit Little Endian)
 U24_BE (Unsigned 24 bit Big Endian)
 S32_LE (Signed 32 bit Little Endian)
 S32_BE (Signed 32 bit Big Endian)
 U32_LE (Unsigned 32 bit Little Endian)
 U32_BE (Unsigned 32 bit Big Endian)
 FLOAT_LE (Float 32 bit Little Endian)
 FLOAT_BE (Float 32 bit Big Endian)
 FLOAT64_LE (Float 64 bit Little Endian)
 FLOAT64_BE (Float 64 bit Big Endian)
 IEC958_SUBFRAME_LE (IEC-958 Little Endian)
 IEC958_SUBFRAME_BE (IEC-958 Big Endian)
 MU_LAW (Mu-Law)
 A_LAW (A-Law)
 IMA_ADPCM (Ima-ADPCM)
 MPEG (MPEG)
 GSM (GSM)
 SPECIAL (Special)
 S24_3LE (Signed 24 bit Little Endian in 3bytes)
 S24_3BE (Signed 24 bit Big Endian in 3bytes)
 U24_3LE (Unsigned 24 bit Little Endian in 3bytes)
 U24_3BE (Unsigned 24 bit Big Endian in 3bytes)
 S20_3LE (Signed 20 bit Little Endian in 3bytes)
 S20_3BE (Signed 20 bit Big Endian in 3bytes)
 U20_3LE (Unsigned 20 bit Little Endian in 3bytes)
 U20_3BE (Unsigned 20 bit Big Endian in 3bytes)
 S18_3LE (Signed 18 bit Little Endian in 3bytes)
 S18_3BE (Signed 18 bit Big Endian in 3bytes)
 U18_3LE (Unsigned 18 bit Little Endian in 3bytes)
 U18_3BE (Unsigned 18 bit Big Endian in 3bytes)
 G723_24 (G.723 (ADPCM) 24 kbit/s, 8 samples in 3 bytes)
 G723_24_1B (G.723 (ADPCM) 24 kbit/s, 1 sample in 1 byte)
 G723_40 (G.723 (ADPCM) 40 kbit/s, 8 samples in 3 bytes)
 G723_40_1B (G.723 (ADPCM) 40 kbit/s, 1 sample in 1 byte)
 DSD_U8 (Direct Stream Digital, 1-byte (x8), oldest bit in MSB)
 DSD_U16_LE (Direct Stream Digital, 2-byte (x16), little endian, oldest bits in MSB)
 DSD_U32_LE (Direct Stream Digital, 4-byte (x32), little endian, oldest bits in MSB)
 DSD_U16_BE (Direct Stream Digital, 2-byte (x16), big endian, oldest bits in MSB)
 DSD_U32_BE (Direct Stream Digital, 4-byte (x32), big endian, oldest bits in MSB)

PCM subformats:
 STD (Standard)

PCM states:
 OPEN
 SETUP
 PREPARED
 RUNNING
 XRUN
 DRAINING
 PAUSED
 SUSPENDED

根据以上信息，我们进一步细化目标，我们录音的采样率定为8000，sample长度定为16位,小端。

PCM handle name = 'default'
PCM state = PREPARED
access type = RW_INTERLEAVED
format = 'S16_LE' (Signed 16 bit Little Endian)
subformat = 'STD' (Standard)
channels = 1
rate = 7999 bps
period time = 256000 us
period size = 2048 frames
buffer time = 256000 us
buffer size = 2097152 frames
periods per buffer = 1024 frames
exact rate = 7999/1 bps
significant bits = 16
tick time = 0 us
is batch = 0
is block transfer = 1
is double = 0
is half duplex = 0
is joint duplex = 0
can overrange = 0
can mmap = 0
can pause = 1
can resume = 0
can sync start = 0

实时处理程序代码

#define ALSA_PCM_NEW_HW_PARAMS_API

#include <alsa/asoundlib.h>
#include "signal_processing_library.h"
#include "noise_suppression_x.h"
#include "noise_suppression.h"
#include "gain_control.h"
#include "echo_cancellation.h"

#define SAMPLES 8000
#define FRAMES 160

void webRtcNsProc(NsHandle *pNS_inst,short * pData, FILE *outfilenameNs,int frames,short * pOutData,int* filter_state1,int* filter_state12,int* Synthesis_state1,int* Synthesis_state12)
{
	  	int len = frames*2;
		short shInL[160],shInH[160];
		short shOutL[160] = {0},shOutH[160] = {0};

		//fprintf(stderr,"NS shuBufferIn[] data: %d ... %d \n",shuBufferIn[0],shuBufferIn[79]);
		//首先需要使用滤波函数将音频数据分高低频，以高频和低频的方式传入降噪函数内部
		WebRtcSpl_AnalysisQMF(pData,frames,shInL,shInH,filter_state1,filter_state12);

		//将需要降噪的数据以高频和低频传入对应接口，同时需要注意返回数据也是分高频和低频
		if (0 == WebRtcNs_Process(pNS_inst ,shInL ,shInH ,shOutL , shOutH))
		{
			short shBufferOut[320];
			//如果降噪成功，则根据降噪后高频和低频数据传入滤波接口，然后用将返回的数据写入文件
			WebRtcSpl_SynthesisQMF(shOutL,shOutH,160,shBufferOut,Synthesis_state1,Synthesis_state12);
			memcpy(pOutData,shBufferOut,frames*sizeof(short));
		}
	
		if (NULL == outfilenameNs)
		{
			printf("open NS out file err! \n");
		}
		fwrite(pOutData, 1, len, outfilenameNs);
}

void WebRtcAgcProc(void *agcHandle,short * pData, FILE * outfilename,int frames,short * pOutData)
{

	int len = frames*sizeof(short);		//  len=2*frames
	int micLevelIn = 0;
	int micLevelOut = 0;

	// memset(pData, 0, len);
	

	int inMicLevel  = micLevelOut;
	int outMicLevel = 0;
	uint8_t saturationWarning;
	int nAgcRet = WebRtcAgc_Process(agcHandle, pData, NULL, frames, pOutData,NULL, inMicLevel, &outMicLevel, 0, &saturationWarning);

	if (nAgcRet != 0)
	{
		printf("failed in WebRtcAgc_Process %d \n",nAgcRet);
	}
	micLevelIn = outMicLevel;
	fwrite(pOutData, 1, len, outfilename);

}

void WebRtcAecProc(void *aecmInst,short *near_frame,short* far_frame,FILE * fp_out,int frames,short * out_frame)
{

	int len = FRAMES*sizeof(short);	

	printf("aec_proc_near_frame data: %d %d %d ...  %d %d %d\n",*near_frame,*(near_frame + 1),*(near_frame + 2),*(near_frame + frames - 3),*(near_frame + frames -2),*(near_frame + frames - 1));
	WebRtcAec_BufferFarend(aecmInst, far_frame, FRAMES);//对参考声音(回声)的处理
	WebRtcAec_Process(aecmInst, near_frame, NULL, out_frame, NULL, frames,-3,0);//回声消除
	printf("aec_out_proc_frame data: %d %d %d ...  %d %d %d\n",*out_frame,*(out_frame + 1),*(out_frame + 2),*(out_frame + frames - 3),*(out_frame + frames - 2),*(out_frame + frames - 1));

	fwrite(out_frame, 1, len, fp_out);

}

int main()
{
   long loops;		//一个长整型变量， 
   int rc,rc1,rc2,rc3,rc4,rc5,rc6;		//一个int变量 ,用来存放 snd_pcm_open（访问硬件）的返回值 
   int size;		//一个int变量 
   snd_pcm_t * handle;		// 一个指向snd_pcm_t的指针 
   snd_pcm_hw_params_t * params;	// 一个指向 snd_pcm_hw_params_t的指针 
   unsigned int val;		// 无符号整型变量 ，用来存放录音时候的采样率 
   int dir;			// 整型变量 
   snd_pcm_uframes_t frames;		// snd_pcm_uframes_t 型变量 
   short * buffer = NULL;		// 一个字符型指针 
   short * buffertemp1 = NULL;		// 一个临时字符型指针
   short * buffertemp2 = NULL;		// 一个临时字符型指针
   short * buffertemp3 = NULL;		// 一个临时字符型指针
   short * buffertemp4 = NULL;		// 一个临时字符型指针
   short * buffertemp5 = NULL;		// 一个临时字符型指针
   short * buffertempmicin = NULL;		// 一个临时字符型指针
   short * buffertempspeaker = NULL;		// 一个临时字符型指针
   short * bufferAgcOutData = NULL;	// 指向Agc输出数据地址的指针
   short * bufferNsOutData = NULL;	// 指向NS输出数据地址的指针
   short * bufferAecOutData = NULL;	// 指向NS输出数据地址的指针
   short * bufferAecMicinData = NULL;	// 指向NS输出数据地址的指针
   short * bufferAecSpeakerData = NULL;	// 指向NS输出数据地址的指针

   int  filter_state1[6],filter_state12[6];
   int  Synthesis_state1[6],Synthesis_state12[6];
   memset(filter_state1,0,sizeof(filter_state1));
   memset(filter_state12,0,sizeof(filter_state12));
   memset(Synthesis_state1,0,sizeof(Synthesis_state1));
   memset(Synthesis_state12,0,sizeof(Synthesis_state12));
   FILE * out_fd1,*out_fd2,*out_fd3,*out_fd4,*out_fd5,*out_fd6,*out_fdAgc,*out_fdNs,*out_fdAec;		// 一个指向文件的指针 
   out_fd1 = fopen("out_pcm1.raw","wb+");
   out_fd2 = fopen("out_pcm2.raw","wb+");
   out_fd3 = fopen("out_pcm3.raw","wb+");
   out_fd4 = fopen("out_pcm4.raw","wb+");
   out_fd5 = fopen("out_pcm5.raw","wb+");
   out_fd6 = fopen("out_pcm6.raw","wb+");
   out_fdAgc = fopen("out_pcmAgc.raw","wb+");
   out_fdNs = fopen("out_pcmNs.raw","wb+");
   out_fdAec = fopen("out_pcmAec.raw","wb+");		
/* 将流与文件之间的关系建立起来，文件名为 out_pcm.raw，w是以文本方式打开文件，wb是二进制方式打开文件wb+ 读写打开或建立一个二进制文件，允许读和写。*/ 
   /* open PCM device for recording (capture). */
   // 访问硬件，并判断硬件是否访问成功 
   rc = snd_pcm_open(&handle, "default",SND_PCM_STREAM_CAPTURE,0);
   if( rc < 0 )
   {
      fprintf(stderr,"unable to open pcm device: %s\n",
              snd_strerror(rc));
      exit(1);
   }
   /* allocate a hardware parameters object */
   // 分配一个硬件变量对象 
   snd_pcm_hw_params_alloca(¶ms);
   /* fill it with default values. */
   // 按照默认设置对硬件对象进行设置 
   snd_pcm_hw_params_any(handle,params);
   /* set the desired hardware parameters */
   /* interleaved mode 设置数据为交叉模式*/
   snd_pcm_hw_params_set_access(handle,params,SND_PCM_ACCESS_RW_INTERLEAVED);
   /* signed 16-bit little-endian format */
   // 设置数据编码格式为PCM、有符号、16bit、LE格式 
   snd_pcm_hw_params_set_format(handle,params,SND_PCM_FORMAT_S16_LE);

   // 设置声道数量 
   snd_pcm_hw_params_set_channels(handle,params,6);
   /* sampling rate */
   // 设置采样率 
   val = SAMPLES;
   snd_pcm_hw_params_set_rate_near(handle,params,&val,&dir);

   /* set period size */
   // 周期长度（帧数） 
   frames = FRAMES;
   snd_pcm_hw_params_set_period_size_near(handle,params,&frames,&dir);
   /* write parameters to the driver */
   // 将配置写入驱动程序中
   // 判断是否已经配置正确 
   rc = snd_pcm_hw_params(handle,params);
   if ( rc < 0 )
   {
       fprintf(stderr,"unable to set hw parameters: %s\n",snd_strerror(rc));
       exit(1);
   }
   /* use a buffer large enough to hold one period */
   // 配置一个缓冲区用来缓冲数据，缓冲区要足够大，此处看意思应该是只配置了
   // 够两个声道用的缓冲内存 
   snd_pcm_hw_params_get_period_size(params,&frames,&dir);
   size = frames * 12; /* 2 bytes/sample, 2channels */
   buffer = ( short * ) malloc(size);
   buffertemp4 = ( short * )malloc(frames*sizeof(short));
   bufferAgcOutData = ( short * )malloc(frames*sizeof(short));
   bufferNsOutData = ( short * )malloc(frames*sizeof(short));
   bufferAecOutData = ( short * )malloc(frames*sizeof(short));
   bufferAecMicinData = ( short * )malloc(frames*sizeof(short));
   bufferAecSpeakerData = ( short * )malloc(frames*sizeof(short));
   // 记录声音的长度，单位uS 
   snd_pcm_hw_params_get_period_time(params, &val, &dir);
   loops = 3000000 / val;


	void *aecmInst = NULL;
	WebRtcAec_Create(&aecmInst);
	WebRtcAec_Init(aecmInst, SAMPLES, SAMPLES);

	AecConfig config;
	config.nlpMode = kAecNlpConservative;
	WebRtcAec_set_config(aecmInst, config);

	void *agcHandle = NULL;	
	WebRtcAgc_Create(&agcHandle);
	int minLevel = 0;
	int maxLevel = 255;
	int agcMode  = 3;	// 3 - Fixed Digital Gain 0dB
	WebRtcAgc_Init(agcHandle, minLevel, maxLevel, agcMode, SAMPLES);
	WebRtcAgc_config_t agcConfig;
	agcConfig.compressionGaindB = 20;
	agcConfig.limiterEnable     = 1;
	agcConfig.targetLevelDbfs   = 3;
	WebRtcAgc_set_config(agcHandle, agcConfig);

	NsHandle *pNS_inst = NULL;
	int nMode = 1;	
	if (0 != WebRtcNs_Create(&pNS_inst))
	{
		printf("Noise_Suppression WebRtcNs_Create err! \n");
	}
	if (0 !=  WebRtcNs_Init(pNS_inst,SAMPLES))
	{
		printf("Noise_Suppression WebRtcNs_Init err! \n");
	}
	if (0 !=  WebRtcNs_set_policy(pNS_inst,nMode))
	{
		printf("Noise_Suppression WebRtcNs_set_policy err! \n");
	}





   while( loops > 0 )
   {
       loops--;
       rc = snd_pcm_readi(handle,buffer,frames);		// 读取录音数据
       if ( rc == -EPIPE )
       {
          /* EPIPE means overrun */
          fprintf(stderr,"overrun occured\n");
          snd_pcm_prepare(handle);
       }
       else if ( rc < 0 )
       {
          fprintf(stderr,"error from read: %s\n",snd_strerror(rc));
       }
       else if ( rc != (int)frames)
       {
          fprintf(stderr,"short read, read %d frames\n",rc);
       }

       // 将音频数据写入文件，把buffer中的数据写入到out-fd中
	buffertemp1 = buffer;
	buffertemp2 = buffer;
	buffertemp5 = buffertemp4;
	buffertempmicin = bufferAecMicinData;
	buffertempspeaker = bufferAecSpeakerData;




	int loopfor;
	for(loopfor = 1;loopfor <= frames;loopfor++)
	    {
		int loopwhile = 6;
		
		buffertemp2++;

		buffertemp3 = buffertemp1;
		rc1 = fwrite(buffertemp3, 1, 2, out_fd1);

		buffertemp3 = buffertemp3 + 1;
		rc2 = fwrite(buffertemp3, 1, 2, out_fd2);
		*buffertemp5 = *buffertemp3;
		buffertemp5++;

		buffertemp3 = buffertemp3 + 1;
		rc3 = fwrite(buffertemp3, 1, 2, out_fd3);
		*buffertempmicin = *buffertemp3;
		buffertempmicin++;

		buffertemp3 = buffertemp3 + 1;
		rc4 = fwrite(buffertemp3, 1, 2, out_fd4);

		buffertemp3 = buffertemp3 + 1;
		rc5 = fwrite(buffertemp3, 1, 2, out_fd5);

		buffertemp3 = buffertemp3 + 1;
		rc6 = fwrite(buffertemp3, 1, 2, out_fd6);
		*buffertempspeaker = *buffertemp3;
		buffertempspeaker++;

		buffertemp1 = buffertemp1 + 6;
	    }

	    WebRtcAgcProc(agcHandle,buffertemp4,out_fdAgc,frames,bufferAgcOutData);

	    // WebRtcAec_BufferFarend(aecmInst, far_frame, FRAMES);//对参考声音(回声)的处理
	    
	    WebRtcAecProc(aecmInst,bufferAecMicinData,bufferAecSpeakerData,out_fdAec,frames,bufferAecOutData);


	    webRtcNsProc(pNS_inst,bufferAgcOutData,out_fdNs,frames,bufferNsOutData,filter_state1,filter_state12,Synthesis_state1,Synthesis_state12);


       if ( rc != frames )
        {
            fprintf(stderr,"short write: wrote %d bytes\n \n",rc);
        }
   }

   WebRtcNs_Free(pNS_inst);
   WebRtcAgc_Free(agcHandle);
   WebRtcAec_Free(aecmInst);
   snd_pcm_drain(handle);
   snd_pcm_close(handle);
   free(buffer);
   free(buffertemp1);
   free(buffertemp2);
   free(buffertemp3);
   free(buffertempmicin);
   free(buffertempspeaker);
   free(bufferAgcOutData);
   free(bufferNsOutData);
   free(bufferAecOutData);
   free(bufferAecMicinData);
   free(bufferAecSpeakerData);
   fclose(out_fd1);
   fclose(out_fd2);
   fclose(out_fd3);
   fclose(out_fd4);
   fclose(out_fd5);
   fclose(out_fd6);
   fclose(out_fdAgc);
   fclose(out_fdNs);
   fclose(out_fdAec);
}

上面的程序配合之前安装的动态库，就完成了相应的AEC、NS、AGC处理

在linux系统上实现实时AEC功能

猜你喜欢