1, enter Baidu AI official website registered account and speech recognition services, create a voice recognition application
AI get Baidu application AppID, API Key, Secret Key
2, python code for AI voice calls Baidu platform
2.1, install python library
(1), pyAudio library
the library because of the need c ++ dependent, can not be used directly pip installed directly, we need to download the installation file whlee
github Address: https://github.com/intxcc/pyaudio_portaudio/releases
See here: https://blog.csdn.net/qq_36387683/article/details/91960141
Open the file directory:
pip install PyAudio-0.2.11-cp37-cp37m-win_amd64.whl
(2) \ wave, installation baidu-aip library
See here: https://blog.csdn.net/alice_tl/article/details/97434261
pip install wave
pip install baidu-aip
2.2, the code calls
See here: https://blog.csdn.net/qq_42145185/article/details/101209531
# -*- coding: UTF-8 -*-
from aip import AipSpeech
import pyaudio
import wave
input_filename = "input.wav" # 麦克风采集的语音输入
input_filepath = "" # 输入s文件的path
in_path = input_filepath + input_filename
""" 你的 APPID AK SK """
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
'''语音识别部分'''
def Speech():
def get_file_content(filePath):
with open(filePath, "rb") as fp:
return fp.read()
keyword = client.asr(get_file_content('input.wav'), 'pcm', 16000, {'dev_ped': 1536})
print(keyword['err_no'])
print(keyword['err_msg'])
print(keyword['result'][0])
'''语音采集部分'''
def get_audio(filepath):
aa = str(input("是否开始录音? (是/否)"))
if aa == str("是"):
CHUNK = 256
FORMAT = pyaudio.paInt16
CHANNELS = 1 # 声道数
RATE = 11025 # 采样率
RECORD_SECONDS = 10 # 采集时间(s)
WAVE_OUTPUT_FILENAME = filepath # 输出文件名和路径
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("*" * 10, "开始录音:请在10秒内输入语音")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("*" * 10, "录音结束\n")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
elif aa == str("否"):
exit()
else:
print("无效输入,请重新选择")
get_audio(in_path)
if __name__ == '__main__':
for i in range(1):
get_audio(in_path)
Speech()
The recognition result is great! ! ! !
Feel good part of the article: