Baidu speech recognition + Turing robot + python to realize intelligent dialogue (Xiaoai robot)

Baidu speech recognition + Turing robot + python to achieve intelligent dialogue

1. Display effect

Insert picture description here

For simple conversations, Mandarin needs to be relatively standard. Of course, it can be set to Sichuan dialect, but the name may also be wrong, because there are too many near-phonetic words.

2. The realization process

2.1, get the voice first

In layman's terms, you speak to generate a voice file

def save_wave_file(filepath, data):
  wf = wave.open(filepath, 'wb')
  wf.setnchannels(channels)
  wf.setsampwidth(sampwidth)
  wf.setframerate(framerate)
  wf.writeframes(b''.join(data))
  wf.close()
  
def my_record():
  pa = PyAudio()
  #打开一个新的音频stream
  stream = pa.open(format=paInt16, channels=channels,
           rate=framerate, input=True, frames_per_buffer=num_samples)
  my_buf = [] #存放录音数据
 
  t = time.time()
  print('正在录音...')
  while time.time() < t + 4: # 设置录音时间（秒）
  	#循环read，每次read 2000frames
    string_audio_data = stream.read(num_samples)
    my_buf.append(string_audio_data)
  print('录音结束.')
  save_wave_file(FILEPATH, my_buf)
  stream.close()

It is necessary to conduct a conversation through a voice file, so first convert it into a voice file.

2.2, get file content

Is the voice file converted into text, here is called Baidu voice recognition

def listen():
 # 读取录音文件
 with open(FILEPATH, 'rb') as fp:
   voices = fp.read()
 try:
   # 参数dev_pid：1536普通话(支持简单的英文识别)、1537普通话(纯中文识别)、1737英语、1637粤语、1837四川话、1936普通话远场
   result = client.asr(voices, 'wav', 16000, {'dev_pid': 1837, })
   # result = CLIENT.asr(get_file_content(path), 'wav', 16000, {'lan': 'zh', })
   # print(result)
   # print(result['result'][0])
   # print(result)
   result_text = result["result"][0]
   print("you said: " + result_text)
   return result_text
 except KeyError:
   print("faild")

2.3. Dialogue with Turing Robot through the interface

After registering the Turing Robot, there will be a voice interface, here is to use a crawler to talk to him

 
class TuringChatMode(object):
    #初始化API请求地址
    def __init__(self):
        # API接口地址
        self.turing_url = 'http://www.tuling123.com/openapi/api?'

     #定义人机交互方法
    def botInteraction (self,text):
        url_data = dict(
            key = 'e7ea86036040426e8a9d123176bfe12f',
            info = text,
            userid = 'yjc',
        )
      
        self.request = Request(self.turing_url + urlencode(url_data))#设置并实例化Request

        try:
            w_data = urlopen(self.request)#发送请求
        except URLError:
            raise Exception("No internet connection available to transfer txt data")
            #断言了请求URL异常
        except:
            raise KeyError("Server wouldn't respond (invalid key or quota has been maxed out)")
            # 其他情况断言提示服务相应次数已经达到上限

        response_text = w_data.read().decode('utf-8')
        #print(response_text)

        json_result = json.loads(response_text)#将json格式进行解析

        return json_result['text']

3.4. After obtaining the Turing robot's dialogue, turn it into a voice broadcast

            engine = pyttsx3.init()
            engine.say(botMsg)
            engine.runAndWait()

I implemented it in the same win, so if you use the hardware, you can see how to write the code

3. Code


import time
import wave
from pyaudio import PyAudio, paInt16
import pyttsx3
import json
from aip import AipSpeech
from urllib.request import urlopen,Request
from urllib.error import URLError
from urllib.parse import urlencode

APP_ID = '21715'
API_KEY = 'O0gzDUHKkciBa60V1'
SECRET_KEY = 'Psji0dC90D1OehYh63ZaQu'
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
framerate = 16000 # 采样率
num_samples = 2000 # 采样点
channels = 1 # 声道
sampwidth = 2 # 采样宽度2bytes
FILEPATH = './myvoices.wav'
 
class TuringChatMode(object):
    #初始化API请求地址
    def __init__(self):
        # API接口地址
        self.turing_url = 'http://www.tuling123.com/openapi/api?'

     #定义人机交互方法
    def botInteraction (self,text):
      
        url_data = dict(
            key = 'e7ea86036040426e8a9d123176bfe12f',
            info = text,
            userid = 'yjc',
        )
      
        self.request = Request(self.turing_url + urlencode(url_data))#设置并实例化Request

        try:
            w_data = urlopen(self.request)#发送请求
        except URLError:
            raise Exception("No internet connection available to transfer txt data")
            #断言了请求URL异常
        except:
            raise KeyError("Server wouldn't respond (invalid key or quota has been maxed out)")
            # 其他情况断言提示服务相应次数已经达到上限

        response_text = w_data.read().decode('utf-8')
        #print(response_text)

        json_result = json.loads(response_text)#将json格式进行解析

        return json_result['text']
 
def save_wave_file(filepath, data):
  wf = wave.open(filepath, 'wb')
  wf.setnchannels(channels)
  wf.setsampwidth(sampwidth)
  wf.setframerate(framerate)
  wf.writeframes(b''.join(data))
  wf.close()
 
 
#录音
def my_record():
  pa = PyAudio()
  #打开一个新的音频stream
  stream = pa.open(format=paInt16, channels=channels,
           rate=framerate, input=True, frames_per_buffer=num_samples)
  my_buf = [] #存放录音数据
 
  t = time.time()
  print('正在录音...')
  while time.time() < t + 4: # 设置录音时间（秒）
  	#循环read，每次read 2000frames
    string_audio_data = stream.read(num_samples)
    my_buf.append(string_audio_data)
  print('录音结束.')
  save_wave_file(FILEPATH, my_buf)
  stream.close()

def listen():
  # 读取录音文件
  with open(FILEPATH, 'rb') as fp:
    voices = fp.read()
  try:
    # 参数dev_pid：1536普通话(支持简单的英文识别)、1537普通话(纯中文识别)、1737英语、1637粤语、1837四川话、1936普通话远场
    result = client.asr(voices, 'wav', 16000, {'dev_pid': 1837, })
    # result = CLIENT.asr(get_file_content(path), 'wav', 16000, {'lan': 'zh', })
    # print(result)
    # print(result['result'][0])
    # print(result)
    result_text = result["result"][0]
    print("you said: " + result_text)
    return result_text
  except KeyError:
    print("faild")
if __name__ == '__main__':
    turing = TuringChatMode()
    while True:
        my_record()
        msg = listen()
        if msg == "退出。":
            exit()
        else:
            botMsg = turing.botInteraction(msg)
            print("图灵BOT回复我:",botMsg)
            engine = pyttsx3.init()
            engine.say(botMsg)
            engine.runAndWait()