使用sounddevice+soudfile将麦克风录音保存为wav文件，然后再转换为文字，实现ASR

也可以保存为mp3文件，直接把文件的后缀更改一下就行
也可以直接将麦克风语音转换为文字，但是识别效果不如先保存为文件，再识别的效果好。
代码：

# -*- coding: utf-8 -*-

import sounddevice as sd
import soundfile as sf

class SPEECH2FILE:
    def __init__(self):
        self.duration = 5  # 录制时长（秒）
        self.sample_rate = 16000  # 采样率


    def record_audio(self):
        # 录制麦克风音频
        print(f"开始录音，请说话...(时长为{
      
      self.duration}秒)")
        audio = sd.rec(int(self.duration * self.sample_rate), samplerate=self.sample_rate, channels=1)
        sd.wait()  # 等待录音完成
        return audio

    def save_audio_to_file(self, audio, file_path):
        sf.write(file_path, audio, self.sample_rate)


if __name__ == "__main__":
    speech2file = SPEECH2FILE()

    # 录制音频
    audio = speech2file.record_audio()

    # 保存音频为文件
    file_path = 'recorded_audio2.wav'
    speech2file.save_audio_to_file(audio, file_path)

    print("录音已保存为文件：", file_path)

T
使用谷歌的SpeechRecongition将音频文件转换为文字

import speech_recognition as sr

class CATT():

    def convert_audio_to_text(self, file_path):
        r = sr.Recognizer()

        with sr.AudioFile(file_path) as source:
            audio_data = r.record(source)  # 从音频文件中读取音频数据
            # print(str(audio_data))
        while True:
            try:
                text = r.recognize_google(audio_data, language='zh-CN')
                return text
            except sr.UnknownValueError:
                print("语音识别无法理解音频内容")
            except sr.RequestError as e:
                print("无法连接到语音识别服务：{0}".format(e))

        return ""
if __name__ == "__main__":

    catt = CATT()
    # 指定音频文件路径
    file_path = '../recordFile/recorded_audio.wav'

    # 将音频文件转换为文字
    text = catt.convert_audio_to_text(file_path)

    print("转换结果：", text)

使用sounddevice+soudfile将麦克风录音保存为wav文件，然后再转换为文字，实现ASR

猜你喜欢