QT5.14.1 simple implementation of Baidu speech recognition (with source code)

QT5.14.1 simply implements Baidu speech recognition

Note: This tutorial is acquired through the bilibili website, and the layout of the ui is simply adjusted. During the learning process, the speech recognition process and all the source code are restored (see the bottom of this article for the source code acquisition method and use). According to the learning process, write the following Tutorial, everyone can download and study.

1. Click Run, press and hold "Speak and Hold" without holding the mouse to start audio recording, and when you release it, complete the speech recognition conversion text. Let's take a look at the components used to run the renderings and the UI

Insert picture description here
Insert picture description here

2. Voice recognition programming process: 1. Send id, secret to the server through TCP protocol, one of the servers will return an access_token (its validity period is 1 month) 2. Then send the audio file access_token to another server, and return voice recognition result

Insert picture description here

3. The code in the header file and cpp

3.1 audio.h

#ifndef AUDIO_H
#define AUDIO_H

#include <QObject>
#include <QAudioFormat>
#include <QAudioDeviceInfo>
#include <QMessageBox>
#include <QAudioInput>
#include <QFile>
class Audio : public QObject
{
    Q_OBJECT
public:
    explicit Audio(QObject *parent = nullptr);
    void startAudio(QString fileName);
    void stopAudio();
signals:
private:
    QAudioInput *m_audio;
    QFile *m_file;
};
#endif // AUDIO_H

3.2 http.h

#ifndef HTTP_H
#define HTTP_H

#include <QObject>
#include <QMap>
#include <QNetworkAccessManager>
#include <QNetworkRequest>
#include <QMapIterator>
#include <QNetworkReply>
#include <QEventLoop>
#include <QDebug>
class Http : public QObject
{
    Q_OBJECT
public:
    explicit Http(QObject *parent = nullptr);
    bool post_sync(QString Url,QMap<QString,QString> header,QByteArray requestData,QByteArray &replyData);

signals:
};
#endif // HTTP_H

3.3 speech.h

#ifndef SPEECH_H
#define SPEECH_H

#include <QObject>
#include <http.h>
#include <QJsonDocument>
#include <QJsonParseError>
#include <QJsonObject>
#include <QJsonArray>
#include <QHostInfo>
#include <QFile>
#include <QMessageBox>

//获取access_token相关
const QString baiduTokenUrl = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=%1&client_secret=%2&";
const QString client_id = "xxxxxxxxxxxx";
const QString client_secret = "xxxxxxxxxxxxxxx";
//语音识别相关
const QString baiduSpeechUrl = "http://vop.baidu.com/server_api?dev_pid=1537&cuid=%1&token=%2";

class Speech : public QObject
{
    Q_OBJECT
public:
    explicit Speech(QObject *parent = nullptr);
    QString speechIdentify(QString fileName);
    QString getJsonValue(QByteArray ba,QString key);
signals:
};
#endif // SPEECH_H

3.4 widget.h

#ifndef WIDGET_H
#define WIDGET_H

#include <QWidget>
#include <audio.h>
#include <speech.h>
QT_BEGIN_NAMESPACE
namespace Ui { class Widget; }
QT_END_NAMESPACE

class Widget : public QWidget
{
    Q_OBJECT
public:
    Widget(QWidget *parent = nullptr);
    ~Widget();
private slots:
    void on_pushButton_pressed();
    void on_pushButton_released();
    void on_clearButton_clicked();
private:
    Ui::Widget *ui;
    Audio *audio;
};
#endif // WIDGET_H

3.5 audio.cpp

#include "audio.h"
Audio::Audio(QObject *parent) : QObject(parent)
{
}
void Audio::startAudio(QString fileName)
{
    QAudioDeviceInfo device = QAudioDeviceInfo::defaultInputDevice();
    if(device.isNull())//录音设备不存在
    {
        QMessageBox::warning(NULL,"QAudioDeviceInfo","录音设备不存在");
    }
    else
    {
        //音频编码要求
        QAudioFormat m_format;
        //设置采样频率
        m_format.setSampleRate(16000);//采样率为16k
        //设置通道
        m_format.setChannelCount(1);
        //设置位深
        m_format.setSampleSize(16);
        //设置编码格式
        m_format.setCodec("aduio/pcm");
        //判断设备是否支持该格式
        if(!device.isFormatSupported(m_format))
        {
            m_format = device.nearestFormat(m_format);
        }
        //打开文件
        m_file = new QFile;
        m_file->setFileName(fileName);
        m_file->open(QIODevice::WriteOnly);//向里面写数据
        //创建录音对象
        m_audio = new QAudioInput(m_format,this);
        m_audio->start(m_file);
    }
}
void Audio::stopAudio()
{
    //停止录音
    m_audio->stop();
    //关闭文件
    m_file->close();
    //删除文件对象
    delete m_file;
    m_file = NULL;
}

3.6 http.cpp

#include "http.h"

Http::Http(QObject *parent) : QObject(parent)
{
}
bool Http::post_sync(QString Url, QMap<QString, QString> header, QByteArray requestData, QByteArray &replyData)
{
    //发送请求对象
    QNetworkAccessManager manager;
    //请求对象
    QNetworkRequest request;
    request.setUrl(Url);
    QMapIterator<QString,QString> it(header);//迭代器
    while (it.hasNext())                  //遍历map对象
    {
        it.next();   
    	 request.setRawHeader(it.key().toLatin1(),it.value().toLatin1());
    }
    QNetworkReply *reply =  manager.post(request,requestData);
    //服务器返回,reply会发出信号
    QEventLoop l;
    connect(reply,&QNetworkReply::finished,&l,&QEventLoop::quit);
    l.exec();//死循环,reply发出信号才结束循环
    if(reply != nullptr && reply->error() == QNetworkReply::NoError)
    {
        replyData = reply->readAll();//读取服务器返回的数据
        //qDebug() << replyData;
        return true;
    }
    else
    {
        qDebug()<<"请求失败";
        return false;
    }
}

3.7 main.cpp

#include "widget.h"

#include <QApplication>

int main(int argc, char *argv[])
{
    QApplication a(argc, argv);
    Widget w;
    w.show();
    return a.exec();
}

3.8 speech.cpp

#include "speech.h"

Speech::Speech(QObject *parent) : QObject(parent)
{
}

QString Speech::speechIdentify(QString fileName)
{
    QString accessToken;
    //获取access_token
    QString tokenUrl = QString(baiduTokenUrl).arg(client_id).arg(client_secret);
    QMap<QString,QString> header;
    header.insert(QString("Content-Type"),QString("audio/pcm;rate=16000"));

    QByteArray requestData;//发送具体内容
    QByteArray replyData;//服务器返回内容

    Http m_http;
    bool result = m_http.post_sync(tokenUrl,header,requestData,replyData);
    if(result)
    {
        QString key = "access_token";
        accessToken = getJsonValue(replyData,key);
        //qDebug()<<accessToken;
    }
    else
    {

    }
    //组合URL
    QString speechUrl = QString(baiduSpeechUrl).arg(QHostInfo::localHostName()).arg(accessToken);
    //把文件转化为QByteArray;
    QFile file;
    file.setFileName(fileName);
    file.open(QIODevice::ReadOnly);
    requestData = file.readAll();
    file.close();

    replyData.clear();

    //再次发送请求
    result = m_http.post_sync(speechUrl,header,requestData,replyData);
    if(result)
    {
        QString key = "result";
        QString text = getJsonValue(replyData,key);
        return  text;
        //qDebug()<<accessToken;
    }
    else
    {
        QMessageBox::warning(NULL,"识别提示","识别失败");
    }
    return "";

}

QString Speech::getJsonValue(QByteArray ba, QString key)
{
    QJsonParseError parseError;
    QJsonDocument jsonDocument = QJsonDocument::fromJson(ba,&parseError);
    if(parseError.error == QJsonParseError::NoError)
    {
        if(jsonDocument.isObject())
        {
            //jsonDocument转化为json对象
            QJsonObject jsonObj = jsonDocument.object();
            if(jsonObj.contains(key))
            {
                QJsonValue jsonVal= jsonObj.value(key);
                if(jsonVal.isString())//说明是个字符串
                {
                    return jsonVal.toString();
                }
                else if(jsonVal.isArray())//数组
                {
                    QJsonArray arr =jsonVal.toArray();//转换成jsonArray
                    QJsonValue jv=arr.at(0);          //获取第一个元素
                    return jv.toString();
                }
            }
        }

    }
    return "";
}

3.9 speech.cpp

#include "widget.h"
#include "ui_widget.h"

Widget::Widget(QWidget *parent)
    : QWidget(parent)
    , ui(new Ui::Widget)
{
    ui->setupUi(this);
    ui->pushButton->setText("按住说话");
}

Widget::~Widget()
{
    delete ui;
}


void Widget::on_pushButton_pressed()
{
    ui->pushButton->setText("松开识别");
    //开始录音
    audio = new Audio;
    audio->startAudio("E:\\QT document\\baiduSpeech\\audio.pcm");


}

void Widget::on_pushButton_released()
{
    //停止录音
    audio->stopAudio();
    //修改按钮文字
    ui->pushButton->setText("开始识别");
    //开始识别
    Speech m_speech;
    QString text =  m_speech.speechIdentify("E:\\QT document\\baiduSpeech\\audio.pcm");
    ui->textEdit->setText(text);

    ui->pushButton->setText("按住说话");


}

void Widget::on_clearButton_clicked()
{
    ui->textEdit->clear();
}

4. Related URL address

(1) Baidu Smart Cloud gets the address

(2) BaiduTokenUrl get address

(3) BaiduSpeechUrl gets the address

(4) The packaged program exe Baidu cloud address extraction code: txpf, if you ca n’t package it, you can see another blog post from me

(5) Source address (including the created Id, Secret, source program and packaged release exe)

5. How to use the source code:

Note: 1. The program needs to access the Baidu server and use Baidu voice recognition API. Before the program runs, you need to modify the client_id and client_secret, so you need to create an application in Baidu Intelligent Cloud before this
. 2. The program will generate an audio.pcm, Need to change the storage location of pcm audio

Baidu Smart Cloud Create Application and Id, Secret Obtain

1. Open the URL , log in with Baidu Cloud account, and create an application

Insert picture description here
Insert picture description here
Insert picture description here

2. After the creation is complete, you can see the relevant information in the application list, you need to replace the two values ​​here to the corresponding location (speech.h header file)

Insert picture description here
Insert picture description here

pcm storage location change

1. In widget.cpp, change to the file directory you created. Note that here you need to use the escape "\", see the figure below

Insert picture description here

Published 10 original articles · Likes0 · Visits 117

Guess you like

Origin blog.csdn.net/weixin_44450279/article/details/105596475