Win7环境下测试。
首先安装 Windows Speech SDK,下载地址为:http://www.microsoft.com/download/en/details.aspx?id=10121,SpeechSDK51.exe是简体中文语音引擎,SpeechSDK51LangPach.exe是中文男生语音库。
微软本身的语音库中文语音比较生硬,可以安装NeoSpeech的中文语音库。百度下直接搜索NeoSpeech的中文语音即可找到。
从网上找的代码封装成类,qt5.5,在简单环境下可以满足需求,直接上代码
头文件
<pre name="code" class="cpp">#include <QObject>
#include <QList>
#include <sapi.h>
class ISpVoice;
class ISpObjectToken;
class ISpAudio;
class SpFormat;
class SimpleTTS : public QObject
{
Q_OBJECT
public:
SimpleTTS(QObject *parent);
~SimpleTTS();
void speak(QString text);
//当前的音频格式
SPSTREAMFORMAT currFormat();
private:
void initVoices(); //获取可用的语音库
void initSpFormat(); //设置可用的音频格式
private:
bool m_bTTSEnable; //语音库是否可用
ISpVoice* m_pVoice; //ms com tts 组件
ISpAudio* m_pAudio; //voice 所使用的 音频相关
QList<ISpObjectToken*> m_voices; //可用的语音库数组
QList<SpFormat> m_spFmts; //
};
class SpFormat //封装ms描述的音频格式
{
public:
SpFormat(SPSTREAMFORMAT vl, QString sz);
~SpFormat(){};
QString discription()const { return m_discription; }
SPSTREAMFORMAT getFormat()const{ return m_val; }
DWORD rate()const{ return m_bytePS; }
private:
SPSTREAMFORMAT m_val;
QString m_discription;
DWORD m_bytePS;
};
cpp
#include "simpletts.h"
#include <conio.h>
#include <sphelper.h>
#include <vector>
#include <queue>
#include <string>
#include <QDebug>
#include "jiontctrllmgr.h"
#include "simplelog.h"
#include "jiontctrllmgr.h"
#include <sphelper.h>
#include <spuihelp.h>
#pragma comment(lib,"sapi.lib") //sapi.lib在SDK的lib目录,必需正确配置
SimpleTTS::SimpleTTS(QObject *parent)
: QObject(parent), m_pVoice(NULL), m_pAudio(NULL)
{
m_bTTSEnable = true;
TCHAR szBuf[80] = { 0 };
LPVOID lpMsgBuf = NULL;
HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
if (FAILED(hr))
{
hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
if (FAILED(hr))
{
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |FORMAT_MESSAGE_FROM_SYSTEM,NULL,hr,MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),(LPTSTR)&lpMsgBuf,0, NULL);
wsprintf(szBuf, _T("error %d: %s"), hr, lpMsgBuf);
LocalFree(lpMsgBuf);
#ifdef QT_DEBUG
qDebug().noquote() << "Error to intiliaze COM reason:" + QString::fromStdWString(szBuf);
#else
LOGERROR("Error to intiliaze COM reason:"+ QString::fromStdWString(szBuf));
#endif // QT_DEBUG
return;
}
}
hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&m_pVoice);
if (FAILED(hr))
{
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&lpMsgBuf, 0, NULL);
wsprintf(szBuf, _T("error %d: %s"), hr, lpMsgBuf);
LocalFree(lpMsgBuf);
#ifdef QT_DEBUG
qDebug().noquote() << "Error to intiliaze ISPVoice component,reason: " + QString::fromStdWString(szBuf);
#else
LOGERROR("Error to intiliaze ISPVoice component,reason: " + QString::fromStdWString(szBuf));
#endif // QT_DEBUG
m_bTTSEnable = false;
return;
}
hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOOUT, &m_pAudio);
if (FAILED(hr))
{
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR)&lpMsgBuf, 0, NULL);
wsprintf(szBuf, _T("error %d: %s"), hr, lpMsgBuf);
LocalFree(lpMsgBuf);
#ifdef QT_DEBUG
qDebug().noquote() << "SpCreateDefaultObjectFromCategoryId failed,reason:" + QString::fromStdWString(szBuf);
#else
LOGERROR("SpCreateDefaultObjectFromCategoryId failed,reason:" + QString::fromStdWString(szBuf));
#endif // QT_DEBUG
m_bTTSEnable = false;
return;
}
initVoices();
WCHAR* szDesc;
QStringList list;
for (size_t i = 0; i < m_voices.size(); i++)
{
SpGetDescription(m_voices[i], &szDesc); //获取语音库描述
list << QString::fromStdWString(std::wstring(szDesc));
qDebug() << szDesc;
}
if (list.empty())LOGERROR(QString::fromLocal8Bit("TTS 没有可用的语音库"));
else LOGINFO(QString::fromLocal8Bit("TTS 支持语音库:%1").arg(list.join(",")));
int index = JiontCtrllMgr::getInstance()->config()->voiceIndex();
if (index > 0 && index < m_voices.size())
{
hr = m_pVoice->SetVoice(m_voices[index]);
}
else
{
#ifdef QT_DEBUG
qDebug().noquote() <<QString::fromLocal8Bit("SetVoice index 无效,语音为正确设置!");
#else
LOGERROR(QString::fromLocal8Bit("SetVoice index 无效,语音为正确设置!"));
#endif // QT_DEBUG
}
initSpFormat();
//输出音频设置
CSpStreamFormat Fmt;
index = JiontCtrllMgr::getInstance()->config()->audioIndex();
if (index >= 0 && index < m_spFmts.size())
{
Fmt.AssignFormat(m_spFmts[index].getFormat());
hr = m_pAudio->SetFormat(Fmt.FormatId(), Fmt.WaveFormatExPtr());
if (FAILED(hr))
{
#ifdef QT_DEBUG
qDebug().noquote() << QString::fromLocal8Bit("SetFormat TTS 初始化失败,TTS 不可用!");
#else
LOGERROR(QString::fromLocal8Bit("SetFormat TTS 初始化失败,TTS 不可用!"));
#endif // QT_DEBUG
m_bTTSEnable = false;
return;
}
hr = m_pVoice->SetOutput(m_pAudio, FALSE);
if (FAILED(hr))
{
#ifdef QT_DEBUG
qDebug().noquote() << QString::fromLocal8Bit("SetOutput TTS 初始化失败,TTS 不可用!");
#else
LOGERROR(QString::fromLocal8Bit("SetOutput TTS 初始化失败,TTS 不可用!"));
#endif // QT_DEBUG
m_bTTSEnable = false;
return;
}
}
}
SimpleTTS::~SimpleTTS()
{
m_pVoice->Release();
m_pVoice = NULL;
m_pAudio->Release();
CoUninitialize();
}
void SimpleTTS::speak(QString text)
{
if (!m_bTTSEnable)
{
printf("tts library cant use!");
return;
}
for (int i = 0; i <= 9;i++)
{
QString temp = QString("%1").arg(i);
text.replace(temp, "[" + temp + "]");
}
const QList<QPair<QString, QString>>& list = JiontCtrllMgr::getInstance()->config()->replaceList();
for (int i = 0; i < list.size();i++)
{
text.replace(list[i].first, list[i].second);
}
m_pVoice->Speak((LPCTSTR)text.toStdWString().c_str(), SPF_ASYNC, NULL);
//m_pVoice->WaitUntilDone(INFINITE);
}
SPSTREAMFORMAT SimpleTTS::currFormat()
{
SPSTREAMFORMAT fmt = SPSF_Default;
CComPtr<ISpStreamFormat> cpStream;
HRESULT hr = m_pVoice->GetOutputStream(&cpStream);
CSpStreamFormat Fmt;
if (hr == S_OK)
{
hr = Fmt.AssignFormat(cpStream);
if (SUCCEEDED(hr))
{
fmt = Fmt.ComputeFormatEnum();
}
}
return fmt;
}
void SimpleTTS::initVoices()
{
IEnumSpObjectTokens* cpEnum;
HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
ULONG i = 0, ulCount = 0;
hr = cpEnum->GetCount(&ulCount);
ISpObjectToken* tok;
while (SUCCEEDED(hr) && i < ulCount)
{
hr = cpEnum->Next(1, &tok, NULL);
m_voices.push_back(tok);
i++;
}
cpEnum->Release();
}
void SimpleTTS::initSpFormat()
{
m_spFmts.push_back(SpFormat(SPSF_12kHz16BitStereo, "SPSF_12kHz16BitStereo"));
m_spFmts.push_back(SpFormat(SPSF_16kHz16BitMono, "SPSF_16kHz16BitMono"));
m_spFmts.push_back(SpFormat(SPSF_16kHz16BitStereo, "SPSF_16kHz16BitStereo"));
m_spFmts.push_back(SpFormat(SPSF_22kHz16BitMono, "SPSF_22kHz16BitMono"));
m_spFmts.push_back(SpFormat(SPSF_22kHz16BitStereo, "SPSF_22kHz16BitStereo"));
m_spFmts.push_back(SpFormat(SPSF_24kHz16BitStereo, "SPSF_24kHz16BitStereo"));
m_spFmts.push_back(SpFormat(SPSF_32kHz16BitStereo, "SPSF_32kHz16BitStereo"));
m_spFmts.push_back(SpFormat(SPSF_44kHz16BitMono, "SPSF_44kHz16BitMono"));
m_spFmts.push_back(SpFormat(SPSF_44kHz16BitStereo, "SPSF_44kHz16BitStereo"));
m_spFmts.push_back(SpFormat(SPSF_48kHz16BitMono, "SPSF_48kHz16BitMono"));
m_spFmts.push_back(SpFormat(SPSF_48kHz16BitStereo, "SPSF_48kHz16BitStereo"));
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
SpFormat::SpFormat(SPSTREAMFORMAT vl, QString sz)
{
m_val = vl;
m_discription = sz;
QString sChannel = m_discription.left(15);
DWORD sampleRate, bitRate;
sscanf(sz.toStdString().c_str(), "SPSF_%ukHz%uBit", &sampleRate, &bitRate);
if (sampleRate == 22)
sampleRate = 22050;
else if (sampleRate == 44)
sampleRate = 44100;
else
sampleRate *= 1000;
m_bytePS = sampleRate*bitRate / 8;
if (sChannel == "Stereo")
m_bytePS *= 2;
}