The front end uses Baidu ASR (speech recognition)

1. Baidu ASR application creation

  1. Receive personal free quota ( note: speech recognition is different from speech synthesis, and a new application needs to be created )

Short speech recognition standard version_short speech recognition-Baidu AI open platform (baidu.com)

Click to buy now and receive free quota for individuals or businesses

Insert image description here

  1. Console creation application

This step is the same as the previous article "Using Baidu TTS on the front end" and will not be demonstrated here.

2.Code

Contains TTS and ASR

<template>
  <div class="app">
    <div class="get_tts_token">
      <h1>
        1.请输入你<i>语音合成</i>应用的client_id和client_secret获取access_token
      </h1>
      <el-row :gutter="50">
        <el-col :span="8">
          <el-input
            v-model.trim="client_id"
            placeholder="请输入你的client_id(应用的API Key)"
          />
        </el-col>
        <el-col :span="8">
          <el-input
            v-model.trim="client_secret"
            placeholder="请输入你的client_secret(应用的Secret Key)"
          />
        </el-col>
        <el-col :span="8"
          ><el-button @click="handleGetAccessToken"
            >获取AccessToken</el-button
          ></el-col
        >
      </el-row>
    </div>

    <hr />

    <div class="text2audio">
      <h1>2.语音合成</h1>
      <h4>免费的只能使用前4种语音</h4>
      <el-radio-group v-model="per">
        <el-radio-button label="1">度小宇</el-radio-button>
        <el-radio-button label="0">度小美</el-radio-button>
        <el-radio-button label="3">度逍遥(基础)</el-radio-button>
        <el-radio-button label="4">度丫丫</el-radio-button>
        <el-radio-button label="5003">度逍遥(精品)</el-radio-button>
        <el-radio-button label="5118">度小鹿</el-radio-button>
        <el-radio-button label="106">度博文</el-radio-button>
        <el-radio-button label="110">度小童</el-radio-button>
        <el-radio-button label="111">度小萌</el-radio-button>
        <el-radio-button label="103">度米朵</el-radio-button>
        <el-radio-button label="5">度小娇</el-radio-button>
      </el-radio-group>
      <el-row :gutter="50">
        <el-col :span="8">
          <el-input
            v-model.trim="inputText"
            placeholder="请输入你要转化的文本"
          />
        </el-col>
        <el-col :span="2"
          ><el-button @click="handleTextToAudio">语音合成</el-button></el-col
        >
        <el-col :span="8">
          <audio :src="audioSrc" v-if="audioSrc" controls>
            您的浏览器不支持音频播放。
          </audio>
        </el-col>
      </el-row>
    </div>

    <hr />

    <div class="get_asr_token">
      <h1>
        3.请输入你<i>语音识别</i>应用的client_id和client_secret获取access_token
      </h1>
      <el-row :gutter="50">
        <el-col :span="8">
          <el-input
            v-model.trim="client_id"
            placeholder="请输入你的client_id(应用的API Key)"
          />
        </el-col>
        <el-col :span="8">
          <el-input
            v-model.trim="client_secret"
            placeholder="请输入你的client_secret(应用的Secret Key)"
          />
        </el-col>
        <el-col :span="8"
          ><el-button @click="handleGetAccessToken"
            >获取AccessToken</el-button
          ></el-col
        >
      </el-row>
    </div>

    <hr />

    <div class="audio2text">
      <h1>4.语音识别</h1>
      <el-row :gutter="50">
        <el-col :span="4">
          <el-button @click="handleGetPermissions">获取录音权限</el-button>
        </el-col>
        <el-col :span="4">
          <el-button @click="handleRecording">{
   
   { recordBtn }}</el-button>
        </el-col>
        <el-col :span="8">
          <audio :src="audioRecordSrc" controls></audio>
        </el-col>
      </el-row>
      <el-row :gutter="50">
        <el-col :span="4">
          <el-button @click="handleSpeechRecognition">语音识别</el-button>
        </el-col>
        <el-col :span="8">
          {
   
   { recordText }}
          <!-- <audio :src="audioRecordSrc" controls></audio> -->
        </el-col>
      </el-row>
    </div>
  </div>
</template>

<script setup>
import { reactive, ref } from "vue";
import axios from "axios";
import qs from "qs";
import { ElMessage, ElMessageBox } from "element-plus";
import { HZRecorder } from "./utils/recorder";

// 提示
const openMsg = (message, type) => {
  ElMessage({
    message,
    type,
  });
};

// 1.获取AccessToken
// client_id是你创建的应用的API Key,client_secret是你创建应用的Secret Key
const client_id = ref("");
const client_secret = ref("");

const handleGetAccessToken = async () => {
  try {
    const option = {
      grant_type: "client_credentials",
      client_id: client_id.value,
      client_secret: client_secret.value,
    };
    const res = await axios.post("/oauth/2.0/token", qs.stringify(option));
    if (res.status !== 200) {
      return openMsg(res.statusText, "warning");
    }
    openMsg("获取token成功", "success");
    localStorage.setItem("access_token", res.data.access_token);
    client_id.value = "";
    client_secret.value = "";
  } catch (error) {
    console.log(error);
  }
};

// 2.语音合成接口调用
// per配音角色
const per = ref("1");
// 输入的文本
const inputText = ref("");
// 动态绑定audio的src属性
const audioSrc = ref("");

const handleTextToAudio = async () => {
  const token = localStorage.getItem("access_token");
  if (!token) {
    return openMsg("请先获取token!", "warning");
  }
  textToAudio(token);
};
const textToAudio = async (token) => {
  const option = {
    tex: inputText.value,
    tok: token,
    cuid: `${Math.floor(Math.random() * 1000000)}`,
    ctp: "1",
    lan: "zh",
    per: per.value,
  };
  const res = await axios.post("/text2audio", qs.stringify(option), {
    headers: { "Content-Type": "application/x-www-form-urlencoded" },
    responseType: "blob",
  });
  if (res.status !== 200) {
    return openMsg(res.statusText, "warning");
  }
  openMsg("语音合成成功", "success");
  audioSrc.value = URL.createObjectURL(res.data);
};

// 3.语音识别
let recordBtn = ref("开始录音");
const audioRecordSrc = ref("");
const recordText = ref("");
window.URL = window.URL || window.webkitURL;
// recorder存放录音器实例
let recorder = null;
let isRecorder = false;
// wavBlob存放音频二进制数据
let wavBlob = null;

// 获取权限
const handleGetPermissions = () => {
  // constraints 为需要获取的权限列表,这里只需要指定音频 audio 即可。其返回是个 Promise,因为用户何时进行授权是不确定的。通过在 Promise 的回调中进行授权成功或失败的处理。在使用前需要判断浏览器是否已经支持相应的 API
  if (navigator.mediaDevices.getUserMedia) {
    const constraints = { audio: true };
    navigator.mediaDevices.getUserMedia(constraints).then(
      (stream) => {
        recorder = new HZRecorder(stream);
        console.log("初始化完成");
      },
      () => {
        console.error("授权失败!");
      }
    );
  } else {
    console.error("浏览器不支持 getUserMedia");
  }
};
// 录音
const handleRecording = () => {
  if (!isRecorder) {
    recorder && recorder.start();
    recordBtn.value = "结束录音";
    isRecorder = true;
  } else {
    recorder && recorder.stop();
    wavBlob = recorder.upload();
    console.log(wavBlob);
    audioRecordSrc.value = window.URL.createObjectURL(wavBlob);
    recordBtn.value = "开始录音";
    isRecorder = false;
  }
};
// 语音识别
const handleSpeechRecognition = async () => {
  const token = localStorage.getItem("access_token");
  if (!token) {
    return openMsg("请先获取token!", "warning");
  }
  // wavBlob = recorder.upload();
  let blobToDataURL = (blob, callback) => {
    var a = new FileReader();
    a.onload = function (e) {
      callback(e.target.result.split("data:audio/wav;base64,")[1]);
    };
    a.readAsDataURL(blob);
  };
  blobToDataURL(wavBlob, async (base_64) => {
    const res = await axios.post(
      "/server_api",
      {
        speech: base_64, //本地语音文件的的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
        len: wavBlob.size, //字节数
        dev_pid: 1537, //普通话识别代码
        cuid: "541b:3f:5af4:b2c9",
        rate: 16000, //音频格式16k或8k 采样率、16bit 位深、单声道,
        token: token, //根据你的参数获取的token
        channel: 1, //单声道
        format: "wav", //识别的格式
      },
      {
        headers: {
          "Content-Type": "application/json",
        },
      }
    );
    recorder.clear();
    if (res.data.err_no !== 0) {
      return openMsg(res.data.err_msg, "warning");
    }
    openMsg("识别成功", "success");
    recordText.value = res.data.result[0];
    console.log("识别结果:" + res.data.result[0]);
  });
};
</script>

<style scoped>
.app {
  width: 80%;
  margin: auto;
  margin-top: 50px;
}

hr {
  margin: 30px 0;
}

h1 {
  margin: 10px 0;
}

:deep(.el-radio-group) {
  margin-bottom: 30px;
}
</style>
// recorder.js
export function HZRecorder(stream, config) {
    
    
    config = config || {
    
    };
    config.sampleBits = config.sampleBits || 16;      //采样数位 8, 16
    config.sampleRate = config.sampleRate || 16000;   //采样率16khz

    var context = new (window.webkitAudioContext || window.AudioContext)();
    var audioInput = context.createMediaStreamSource(stream);
    var createScript = context.createScriptProcessor || context.createJavaScriptNode;
    var recorder = createScript.apply(context, [4096, 1, 1]);

    var audioData = {
    
    
        size: 0          //录音文件长度
        , buffer: []     //录音缓存
        , inputSampleRate: context.sampleRate    //输入采样率
        , inputSampleBits: 16       //输入采样数位 8, 16
        , outputSampleRate: config.sampleRate    //输出采样率
        , oututSampleBits: config.sampleBits       //输出采样数位 8, 16
        , input: function (data) {
    
    
            this.buffer.push(new Float32Array(data));
            this.size += data.length;
        }
        , compress: function () {
    
     //合并压缩
            //合并
            var data = new Float32Array(this.size);
            var offset = 0;
            for (var i = 0; i < this.buffer.length; i++) {
    
    
                data.set(this.buffer[i], offset);
                offset += this.buffer[i].length;
            }
            //压缩
            var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
            var length = data.length / compression;
            var result = new Float32Array(length);
            var index = 0, j = 0;
            while (index < length) {
    
    
                result[index] = data[j];
                j += compression;
                index++;
            }
            return result;
        }
        , encodeWAV: function () {
    
    
            var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
            var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
            var bytes = this.compress();
            var dataLength = bytes.length * (sampleBits / 8);
            var buffer = new ArrayBuffer(44 + dataLength);
            var data = new DataView(buffer);

            var channelCount = 1;//单声道
            var offset = 0;

            var writeString = function (str) {
    
    
                for (var i = 0; i < str.length; i++) {
    
    
                    data.setUint8(offset + i, str.charCodeAt(i));
                }
            }

            // 资源交换文件标识符
            writeString('RIFF'); offset += 4;
            // 下个地址开始到文件尾总字节数,即文件大小-8
            data.setUint32(offset, 36 + dataLength, true); offset += 4;
            // WAV文件标志
            writeString('WAVE'); offset += 4;
            // 波形格式标志
            writeString('fmt '); offset += 4;
            // 过滤字节,一般为 0x10 = 16
            data.setUint32(offset, 16, true); offset += 4;
            // 格式类别 (PCM形式采样数据)
            data.setUint16(offset, 1, true); offset += 2;
            // 通道数
            data.setUint16(offset, channelCount, true); offset += 2;
            // 采样率,每秒样本数,表示每个通道的播放速度
            data.setUint32(offset, sampleRate, true); offset += 4;
            // 波形数据传输率 (每秒平均字节数) 单声道×每秒数据位数×每样本数据位/8
            data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
            // 快数据调整数 采样一次占用字节数 单声道×每样本的数据位数/8
            data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
            // 每样本数据位数
            data.setUint16(offset, sampleBits, true); offset += 2;
            // 数据标识符
            writeString('data'); offset += 4;
            // 采样数据总数,即数据总大小-44
            data.setUint32(offset, dataLength, true); offset += 4;
            // 写入采样数据
            if (sampleBits === 8) {
    
    
                for (var i = 0; i < bytes.length; i++, offset++) {
    
    
                    var s = Math.max(-1, Math.min(1, bytes[i]));
                    var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
                    val = parseInt(255 / (65535 / (val + 32768)));
                    data.setInt8(offset, val, true);
                }
            } else {
    
    
                for (var i = 0; i < bytes.length; i++, offset += 2) {
    
    
                    var s = Math.max(-1, Math.min(1, bytes[i]));
                    data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
                }
            }

            return new Blob([data], {
    
     type: 'audio/wav' });
        }
    };
    //开始录音
    this.start = function () {
    
    
        audioInput.connect(recorder);
        recorder.connect(context.destination);
    }

    //停止
    this.stop = function () {
    
    
        recorder.disconnect();
    }

    //获取音频文件
    this.getBlob = function () {
    
    
        this.stop();
        return audioData.encodeWAV();
    }

    //回放
    this.play = function (audio) {
    
    
    	var blob=this.getBlob();
      // saveAs(blob, "F:/3.wav");
      audio.src = window.URL.createObjectURL(this.getBlob());
    }

    //上传
    this.upload = function () {
    
    
      return this.getBlob()
    }

    //清除
    this.clear = function(){
    
    
      console.log("audioData",audioData.size)
      audioData.size = 0
      audioData.buffer = []
    }

    //音频采集
    recorder.onaudioprocess = function (e) {
    
    
        audioData.input(e.inputBuffer.getChannelData(0));
        //record(e.inputBuffer.getChannelData(0));
    }
}

export default HZRecorder

3.Demonstration

  1. Enter the client_id and client_secret of the speech recognition application in (3), and click to obtain the token.
  2. Click to obtain recording permission
  3. Click to start recording, then end the recording. Your recording data will be stored on the audio after the end.
  4. Click Speech Recognition. After the recognition is completed, the recognized text will be displayed on the right side of the button.

Insert image description here

Guess you like

Origin blog.csdn.net/weixin_47365243/article/details/132371841