python call Baidu AI speech recognition

1, enter Baidu AI official website registered account and speech recognition services, create a voice recognition application

AI get Baidu application AppID, API Key, Secret Key
Here Insert Picture Description

2, python code for AI voice calls Baidu platform

2.1, install python library

(1), pyAudio library
the library because of the need c ++ dependent, can not be used directly pip installed directly, we need to download the installation file whlee
github Address: https://github.com/intxcc/pyaudio_portaudio/releases

See here: https://blog.csdn.net/qq_36387683/article/details/91960141

Open the file directory:

pip install PyAudio-0.2.11-cp37-cp37m-win_amd64.whl

(2) \ wave, installation baidu-aip library

See here: https://blog.csdn.net/alice_tl/article/details/97434261

pip install wave
pip install baidu-aip

2.2, the code calls

See here: https://blog.csdn.net/qq_42145185/article/details/101209531

# -*- coding: UTF-8 -*-
from aip import AipSpeech
import pyaudio
import wave

input_filename = "input.wav"  # 麦克风采集的语音输入
input_filepath = ""  # 输入s文件的path
in_path = input_filepath + input_filename

""" 你的 APPID AK SK """
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

'''语音识别部分'''


def Speech():
    def get_file_content(filePath):
        with open(filePath, "rb") as fp:
            return fp.read()

    keyword = client.asr(get_file_content('input.wav'), 'pcm', 16000, {'dev_ped': 1536})

    print(keyword['err_no'])
    print(keyword['err_msg'])
    print(keyword['result'][0])


'''语音采集部分'''


def get_audio(filepath):
    aa = str(input("是否开始录音?   (是/否)"))
    if aa == str("是"):
        CHUNK = 256
        FORMAT = pyaudio.paInt16
        CHANNELS = 1  # 声道数
        RATE = 11025  # 采样率
        RECORD_SECONDS = 10  # 采集时间(s)
        WAVE_OUTPUT_FILENAME = filepath  # 输出文件名和路径
        p = pyaudio.PyAudio()

        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK)

        print("*" * 10, "开始录音:请在10秒内输入语音")
        frames = []
        for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
            data = stream.read(CHUNK)
            frames.append(data)
        print("*" * 10, "录音结束\n")

        stream.stop_stream()
        stream.close()
        p.terminate()

        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))
        wf.close()
    elif aa == str("否"):
        exit()
    else:
        print("无效输入,请重新选择")
        get_audio(in_path)


if __name__ == '__main__':
    for i in range(1):
        get_audio(in_path)
        Speech()

Here Insert Picture Description
The recognition result is great! ! ! !

Feel good part of the article:

https://www.jianshu.com/p/915db160504b

Released seven original articles · won praise 2 · Views 541

Guess you like

Origin blog.csdn.net/qq_41744697/article/details/104090421