一、音频简介
关于音视频的采集可能有很多文章,也有很多分方式,实现方式有
- Core Audio
Core Audio只可以在Vista以上(包括Vista)的操作系统中才能使用,主要用来取代Wave系列API函数和DirectSound,Core Audio实现的功能也比较强大,能实现对麦克风的采集、声卡输出的采集、控制声音的播放。 - DirectSound
DirectSound库是D3D系列库中的一个关于声音采集和播放的库,它是一个COM组件,对于初学者比较抽象。 - Wave库
Wave系列的API函数主要是用来实现对麦克风输入的采集(使用WaveIn系列API函数)和控制声音的播放(使用后WaveOut系列函数)。 - ffmpeg
ffmpeg是一个跨平台的中间库,它提供了不仅仅音频采集相关的库,包括音频、视频采集、播放、转码、过滤等一些列操作。功能非常之强大,这里我也使用ffmpeg封装了音频和视频的采集播放库,这里不做介绍。
wave相关api
涉及的API函数:
- waveInOpen
开启音频采集设备,成功后会返回设备句柄,后续的API都需要使用该句柄
调用模块需要提供一个回调函数(waveInProc),以接收采集的音频数据
- waveInClose
关闭音频采集模块
成功后,由waveInOpen返回的设备句柄将不再有效
- waveInPrepareHeader
准备音频采集数据缓存的空间
- waveInUnprepareHeader
清空音频采集的数据缓存
- waveInAddBuffer
将准备好的音频数据缓存提供给音频采集设备
在调用该API之前需要先调用waveInPrepareHeader
- waveInStart
控制音频采集设备开始对音频数据的采集
- waveInStop
控制音频采集设备停止对音频数据的采集
音频采集设备采集到音频数据后,会调用在waveInOpen中设置的回调函数。
其中参数包括一个消息类型,根据其消息类型就可以进行相应的操作。
如接收到WIM_DATA消息,则说明有新的音频数据被采集到,这样就可以根据需要来对这些音频数据进行处理。
接口封装
// audio capture data callback
typedef void (CALLBACK *AudioDataCallBack)(int nEventType, char* pData, int nDatalen, void* pContext, int dwReserver);
//-------------------------------------------------------------------------
// Usage:
// Init sdk after app init or uninit sdk before app unit
// Parameters:
// NULL
// Remark:
// init sdk by invoke AD_InitSdk after app init and
// uinit sdk by invoke AD_UnInitSdk before app uninit
//-------------------------------------------------------------------------
AUDIOCELLECTSDK_API int AD_InitSdk(void);
AUDIOCELLECTSDK_API void AD_UnInitSdk(void);
//-------------------------------------------------------------------------
// Usage:
// create a pcm audio capture or play instance
// Parameters:
// bCubeSound : single or cube source
// eSamples : samples per second
// eBits : sample bit number
// nBufferSize : wave header buffer size
// return:
// return handle greater than 0 if success else less or equal 0
// Remark:
// Create audio instance handle before all operation, you can use this
// returned handle to invoke all api that supported by this article
//-------------------------------------------------------------------------
AUDIOCELLECTSDK_API LONG AD_CreateCaptureHandle(bool bCubeSound, SamplesPerSec eSamples, SampleBit eBits, unsigned int nBufferSize = 1024);
AUDIOCELLECTSDK_API LONG AD_CreatePlayHandle(bool bCubeSound, SamplesPerSec eSamples, SampleBit eBits);
//-------------------------------------------------------------------------
// Usage:
// destroy audio instance that create by AD_CreateCaptureHandle or
// AD_CreatePlayHandle
// Parameters:
// lInstance : the handle returned by AD_CreateCaptureHandle or
// AD_CreatePlayHandle
// Remark:
// NULL
//-------------------------------------------------------------------------
AUDIOCELLECTSDK_API void AD_DestoryHandle(LONG lInstance);
//-------------------------------------------------------------------------
// Usage:
// set audio capture stream callback
// Parameters:
// lInstance : audio instance returned by AD_CreateCaptureHandle
// AudioDataCallBack : audio stream callback
// pContext : stream callback user context
// Remark:
// invoke AD_SetStreamCallBack before start capture, or else some data
// will be lost
// Return:
// return true if successful else return false
//-------------------------------------------------------------------------
AUDIOCELLECTSDK_API bool AD_SetStreamCallBack(LONG lInstance, AudioDataCallBack pCallBack, void* pContext);
//-------------------------------------------------------------------------
// Usage:
// start pcm audio data capture
// Parameters:
// lInstance : audio instance returned by AD_CreateCaptureHandle
// or AD_CreatePlayHandle
// Remark:
// Start or stop capture audio
//-------------------------------------------------------------------------
AUDIOCELLECTSDK_API bool AD_Start(LONG lInstance);
AUDIOCELLECTSDK_API void AD_Stop(LONG lInstance);
//-------------------------------------------------------------------------
// Usage:
// start play input data that from user
// Parameters:
// lInstance : audio instance returned by AD_CreatePlayHandle
// return:
// return true if success else false
// Remark:
// Start or stop capture audio
//-------------------------------------------------------------------------
AUDIOCELLECTSDK_API bool AD_InputData(LONG lInstance, char* pData, int nLen);
二、音频采集
采集实例实现:
class CWaveRecord :
public CWaveInstance
{
public:
CWaveRecord(bool bCubeSound, SamplesPerSec eSamples,
SampleBit eBits, unsigned int nBufferSize = 1024);
virtual ~CWaveRecord(void);
public:
STDMETHOD_(bool, Start)();
STDMETHOD_(void, Stop)();
public:
void SetDataCallBack(AudioDataCallBack pCallBack, void* pUser);
void OnReadData(WAVEHDR* pWaveHdr);
protected:
void Producer();
static void CALLBACK WaveInProc(HWAVEIN hWaveIn,UINT uMsg, DWORD_PTR dwInstance,
DWORD_PTR dwParam1,DWORD_PTR dwParam2);
protected:
AudioDataCallBack m_pDataCallBack;
void* m_pUserData;
HWAVEIN m_hWaveIn;
WAVEHDR m_waveHdrs[MAX_INPUT_BUF_NUM];
unsigned int m_nBufferSize;
};
typedef boost::shared_ptr<CWaveRecord> WaveRecordPtr;
启动采集代码
STDMETHODIMP_(bool) CWaveRecord::Start()
{
MMRESULT mRet = waveInOpen(&m_hWaveIn, WAVE_MAPPER, &m_waveFormatEx, (DWORD_PTR)WaveInProc,
(DWORD_PTR)this, CALLBACK_FUNCTION);
if (MMSYSERR_NOERROR != mRet)
return false;
// prepared for buffer
for (int i = 0; i < MAX_INPUT_BUF_NUM; ++i)
{
waveInPrepareHeader(m_hWaveIn, &m_waveHdrs[i], sizeof(m_waveHdrs[i]));
waveInAddBuffer(m_hWaveIn, &m_waveHdrs[i], sizeof(m_waveHdrs[i]));
}
// start capture
if (MMSYSERR_NOERROR != waveInStart(m_hWaveIn))
{
waveInClose(m_hWaveIn);
m_hWaveIn = NULL;
return false;
}
m_bStop = false;
m_thread = boost::thread(boost::BOOST_BIND(&CWaveRecord::Producer, this));
return true;
}
启动采集主要实现了音频句柄打开、准备头、启动采集。采集的时候我们启动了一个异步线程进行音频数据生产
void CWaveRecord::Producer()
{
while (!m_bStop)
{
LPWAVEHDR pBuffer = m_bufferList.GetBuffer();
if (NULL == pBuffer)
{
boost::this_thread::sleep_for(boost::chrono::milliseconds(1));
continue;
}
waveInUnprepareHeader(m_hWaveIn, pBuffer, sizeof(WAVEHDR));
if (m_pDataCallBack)
{
m_pDataCallBack(1, pBuffer->lpData, pBuffer->dwBytesRecorded, m_pUserData, 0);
}
if (!m_bStop)
{
::waveInPrepareHeader(m_hWaveIn, pBuffer, sizeof(WAVEHDR));
::waveInAddBuffer(m_hWaveIn, pBuffer, sizeof(WAVEHDR));
}
}
}
采集到数据之后回调到应用层,并重新准备wave头,如此循环!
三、音频播放
音频播放流程和采集差不多,区别是用的waveOut相关接口,实例如下:
class CWavePlay :
public CWaveInstance
{
public:
CWavePlay(bool bCubeSound, SamplesPerSec eSamples, SampleBit eBits);
virtual ~CWavePlay(void);
public:
STDMETHOD_(bool, Start)();
STDMETHOD_(void, Stop)();
public:
bool InputData(char* pData, int nLen);
void OnWriteOver(LPWAVEHDR pBuffer);
protected:
void Consumer();
static void CALLBACK WaveOutProc(HWAVEOUT hWo, UINT uMsg, DWORD_PTR dwInstance,
DWORD_PTR dwParam1, DWORD_PTR dwParam2);
protected:
HWAVEOUT m_hWaveOut;
SafeBufferPtr m_pPlayBuffer;
};
wave启动如下
STDMETHODIMP_(bool) CWavePlay::Start()
{
MMRESULT mRet = waveOutOpen(&m_hWaveOut, WAVE_MAPPER, &m_waveFormatEx,
(DWORD_PTR)WaveOutProc, (DWORD_PTR)this,
CALLBACK_FUNCTION);
if (MMSYSERR_NOERROR != mRet)
return FALSE;
int volume = 0xffffffff;
waveOutSetVolume(m_hWaveOut, volume);
m_bStop = false;
m_thread = boost::thread(boost::BOOST_BIND(&CWavePlay::Consumer, this));
return true;
}
打开wave库设置音量并启动播放线程
{
while (!m_bStop)
{
LPWAVEHDR pBuffer = m_bufferList.GetBuffer();
if (NULL == pBuffer)
{
boost::this_thread::sleep_for(boost::chrono::milliseconds(50));
continue;
}
if (m_hWaveOut)
{
CBuffer* pFullBuffer = m_pPlayBuffer->GetFullBuffer();
::waveOutUnprepareHeader(m_hWaveOut, pBuffer, sizeof(WAVEHDR));
m_pPlayBuffer->AddEmptyBuffer(pFullBuffer);
}
delete pBuffer;
pBuffer = NULL;
}
}
四、测试
音频采集测试,demon调用如下:
void CAudioDemonDlg::OnBnClickedButtonRecord()
{
if (!m_bCapture)
{
m_lCaptureHandle = AD_CreateCaptureHandle(true, SAMPLES_8000, SAMPLE_BIT_16);
if (-1 == m_lCaptureHandle) return ;
AD_SetStreamCallBack(m_lCaptureHandle, AudioDataCallBack, this);
AD_Start(m_lCaptureHandle);
m_bCapture = true;
m_audioBtn.SetWindowText(_T("停止采集"));
}
else
{
AD_Stop(m_lCaptureHandle);
AD_SetStreamCallBack(m_lCaptureHandle, NULL, this);
AD_DestoryHandle(m_lCaptureHandle);
m_bCapture = false;
m_audioBtn.SetWindowText(_T("采集音频"));
}
}
音频回调如下
void CALLBACK CAudioDemonDlg::AudioDataCallBack(int nEventType, char* pData, int nDatalen, void* pContext, int dwReserver)
{
int EventType = nEventType;
char* p = pData;
int nData = nDatalen;
TRACE("音频:类型:%d, 数据大小:%d\n", nEventType, nDatalen);
}
启动测试,可以看到音频采集并打印到控制台中
源码获取、合作、技术交流请获取如下联系方式:
QQ交流群:961179337
微信账号:lixiang6153
公众号:IT技术快餐
电子邮箱:[email protected]