visual c++ 创建Win32工程调用windows API 做语音识别。查找了很多资料,但是很少用Win32的,国外倒是有很多人用C#调用Windows API 做语音识别。很多结合语音识别与word结合,把识别的语音写在word文档上,由于我不是C#喜好者,所以没有查找实现那些的source code。
代码如下:
// SpeechToTextTest2.cpp : Defines the entry point for the application. // #include "stdafx.h" #include "SpeechToTextTest2.h" #include<Windows.h> #include<WinUser.h> #include <sphelper.h> #include <string> //#include "Resource.h" #define WM_RECOEVENT WM_USER+1 #define ID_START_RECOG 13 BOOL CALLBACK DlgProc(HWND hWnd, UINT Message, WPARAM wParam, LPARAM lParam); void LaunchRecognition(HWND hWnd); void HandleEvent(HWND hWnd); WCHAR *ExtractInput(CSpEvent event); void CleanupSAPI(); CComPtr<ISpRecognizer> g_cpEngine; CComPtr<ISpRecoContext> g_cpRecoCtx; CComPtr<ISpRecoGrammar> g_cpRecoGrammar; WCHAR *lpszBuffer; int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd) { // allocating memory for buffer this buffer is used to store // the text during the speech recognition process lpszBuffer = new WCHAR[MAX_PATH]; lpszBuffer[0] = 0; DialogBox(hInstance, MAKEINTRESOURCE(IDD_DIALOG12), NULL, DlgProc); // freeing the memory that was allocated for the buffer delete[] lpszBuffer; return 0; } BOOL CALLBACK DlgProc(HWND hWnd, UINT Message, WPARAM wParam, LPARAM lParam) { switch (Message) { case WM_RECOEVENT: HandleEvent(hWnd); break; case WM_COMMAND: switch (LOWORD(wParam)) { case IDC_BUTTON1: LaunchRecognition(hWnd); break; } break; case WM_CLOSE: CleanupSAPI(); EndDialog(hWnd, 0); break; default: return FALSE; } return TRUE; } void LaunchRecognition(HWND hWnd) { if (FAILED(::CoInitialize(NULL))) { throw std::string("Unable to initialise COM objects"); } ULONGLONG ullGramId = 1; HRESULT hr = g_cpEngine.CoCreateInstance(CLSID_SpSharedRecognizer); if (FAILED(hr)) { throw std::string("Unable to create recognition engine"); } hr = g_cpEngine->CreateRecoContext(&g_cpRecoCtx); if (FAILED(hr)) { throw std::string("Failed command recognition"); } hr = g_cpRecoCtx->SetNotifyWindowMessage(hWnd, WM_RECOEVENT, 0, 0); if (FAILED(hr)) { throw std::string("Unable to select notification window"); } const ULONGLONG ullInterest = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_SOUND_END) | SPFEI(SPEI_PHRASE_START) | SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_FALSE_RECOGNITION) | SPFEI(SPEI_HYPOTHESIS) | SPFEI(SPEI_INTERFERENCE) | SPFEI(SPEI_RECO_OTHER_CONTEXT) | SPFEI(SPEI_REQUEST_UI) | SPFEI(SPEI_RECO_STATE_CHANGE) | SPFEI(SPEI_PROPERTY_NUM_CHANGE) | SPFEI(SPEI_PROPERTY_STRING_CHANGE); hr = g_cpRecoCtx->SetInterest(ullInterest, ullInterest); if (FAILED(hr)) { throw std::string("Failed to create interest"); } hr = g_cpRecoCtx->CreateGrammar(ullGramId, &g_cpRecoGrammar); if (FAILED(hr)) { throw std::string("Unable to create grammar"); } hr = g_cpRecoGrammar->LoadDictation(0, SPLO_STATIC); if (FAILED(hr)) { throw std::string("Failed to load dictation"); } hr = g_cpRecoGrammar->SetDictationState(SPRS_ACTIVE); if (FAILED(hr)) { throw std::string("Failed setting dictation state"); } } void HandleEvent(HWND hWnd) { CSpEvent event; WCHAR *pwszText; // Loop processing events while there are any in the queue while (event.GetFrom(g_cpRecoCtx) == S_OK) { switch (event.eEventId) { case SPEI_HYPOTHESIS: { pwszText = ExtractInput(event); MessageBoxW(NULL, pwszText, L"text", MB_ICONERROR); wcscat(lpszBuffer, pwszText); wcsncat(lpszBuffer, L"\r\n", 2); SetDlgItemTextW(hWnd, IDC_EDIT1, lpszBuffer); } break; } } } WCHAR *ExtractInput(CSpEvent event) { HRESULT hr = S_OK; CComPtr<ISpRecoResult> cpRecoResult; SPPHRASE *pPhrase; WCHAR *pwszText; cpRecoResult = event.RecoResult(); hr = cpRecoResult->GetPhrase(&pPhrase); if (SUCCEEDED(hr)) { if (event.eEventId == SPEI_FALSE_RECOGNITION) { pwszText = L"False recognition"; //MessageBoxW(NULL, pwszText, L"text", MB_ICONERROR); } else { // Get the phrase's entire text string, including replacements. hr = cpRecoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pwszText, NULL); } } CoTaskMemFree(pPhrase); return pwszText; } void CleanupSAPI() { if (g_cpRecoGrammar) { g_cpRecoGrammar.Release(); } if (g_cpRecoCtx) { g_cpRecoCtx->SetNotifySink(NULL); g_cpRecoCtx.Release(); } if (g_cpEngine) { g_cpEngine.Release(); } CoUninitialize(); }
运行结果如下:
代码参考
C++ Speech Recognition:https://www.codeproject.com/Tips/784140/Cplusplus-Speech-Recognition
点进去看这个文章后,你会发现有一堆问都文件问题的人,无法编译通过,因为源工程已经不存在了,.cpp而没有头文件部分。经过一番摸索,我实现了如上。
博客里面也只有.cpp部分,由于csdn下载文件要积分。所以就不上传了源工程了。
如果觉得有参考价值、需要源文件的同志,可以email我,邮箱:[email protected]
还要提醒的是,我只是实现了,源博客APP的实现问题。