前端使用百度ASR(语音识别)

1.百度ASR应用创建

  1. 领取个人免费额度(注意:语音识别与语音合成不同,需要创建新的应用

短语音识别标准版_短语音识别-百度AI开放平台 (baidu.com)

点击立即选购,领取个人或企业免费额度

在这里插入图片描述

  1. 控制台创建应用

这一步与上一篇《前端使用百度TTS》步骤相同,这里不再演示

2.代码

包含TTS和ASR

<template>
  <div class="app">
    <div class="get_tts_token">
      <h1>
        1.请输入你<i>语音合成</i>应用的client_id和client_secret获取access_token
      </h1>
      <el-row :gutter="50">
        <el-col :span="8">
          <el-input
            v-model.trim="client_id"
            placeholder="请输入你的client_id(应用的API Key)"
          />
        </el-col>
        <el-col :span="8">
          <el-input
            v-model.trim="client_secret"
            placeholder="请输入你的client_secret(应用的Secret Key)"
          />
        </el-col>
        <el-col :span="8"
          ><el-button @click="handleGetAccessToken"
            >获取AccessToken</el-button
          ></el-col
        >
      </el-row>
    </div>

    <hr />

    <div class="text2audio">
      <h1>2.语音合成</h1>
      <h4>免费的只能使用前4种语音</h4>
      <el-radio-group v-model="per">
        <el-radio-button label="1">度小宇</el-radio-button>
        <el-radio-button label="0">度小美</el-radio-button>
        <el-radio-button label="3">度逍遥(基础)</el-radio-button>
        <el-radio-button label="4">度丫丫</el-radio-button>
        <el-radio-button label="5003">度逍遥(精品)</el-radio-button>
        <el-radio-button label="5118">度小鹿</el-radio-button>
        <el-radio-button label="106">度博文</el-radio-button>
        <el-radio-button label="110">度小童</el-radio-button>
        <el-radio-button label="111">度小萌</el-radio-button>
        <el-radio-button label="103">度米朵</el-radio-button>
        <el-radio-button label="5">度小娇</el-radio-button>
      </el-radio-group>
      <el-row :gutter="50">
        <el-col :span="8">
          <el-input
            v-model.trim="inputText"
            placeholder="请输入你要转化的文本"
          />
        </el-col>
        <el-col :span="2"
          ><el-button @click="handleTextToAudio">语音合成</el-button></el-col
        >
        <el-col :span="8">
          <audio :src="audioSrc" v-if="audioSrc" controls>
            您的浏览器不支持音频播放。
          </audio>
        </el-col>
      </el-row>
    </div>

    <hr />

    <div class="get_asr_token">
      <h1>
        3.请输入你<i>语音识别</i>应用的client_id和client_secret获取access_token
      </h1>
      <el-row :gutter="50">
        <el-col :span="8">
          <el-input
            v-model.trim="client_id"
            placeholder="请输入你的client_id(应用的API Key)"
          />
        </el-col>
        <el-col :span="8">
          <el-input
            v-model.trim="client_secret"
            placeholder="请输入你的client_secret(应用的Secret Key)"
          />
        </el-col>
        <el-col :span="8"
          ><el-button @click="handleGetAccessToken"
            >获取AccessToken</el-button
          ></el-col
        >
      </el-row>
    </div>

    <hr />

    <div class="audio2text">
      <h1>4.语音识别</h1>
      <el-row :gutter="50">
        <el-col :span="4">
          <el-button @click="handleGetPermissions">获取录音权限</el-button>
        </el-col>
        <el-col :span="4">
          <el-button @click="handleRecording">{
   
   { recordBtn }}</el-button>
        </el-col>
        <el-col :span="8">
          <audio :src="audioRecordSrc" controls></audio>
        </el-col>
      </el-row>
      <el-row :gutter="50">
        <el-col :span="4">
          <el-button @click="handleSpeechRecognition">语音识别</el-button>
        </el-col>
        <el-col :span="8">
          {
   
   { recordText }}
          <!-- <audio :src="audioRecordSrc" controls></audio> -->
        </el-col>
      </el-row>
    </div>
  </div>
</template>

<script setup>
import { reactive, ref } from "vue";
import axios from "axios";
import qs from "qs";
import { ElMessage, ElMessageBox } from "element-plus";
import { HZRecorder } from "./utils/recorder";

// 提示
const openMsg = (message, type) => {
  ElMessage({
    message,
    type,
  });
};

// 1.获取AccessToken
// client_id是你创建的应用的API Key,client_secret是你创建应用的Secret Key
const client_id = ref("");
const client_secret = ref("");

const handleGetAccessToken = async () => {
  try {
    const option = {
      grant_type: "client_credentials",
      client_id: client_id.value,
      client_secret: client_secret.value,
    };
    const res = await axios.post("/oauth/2.0/token", qs.stringify(option));
    if (res.status !== 200) {
      return openMsg(res.statusText, "warning");
    }
    openMsg("获取token成功", "success");
    localStorage.setItem("access_token", res.data.access_token);
    client_id.value = "";
    client_secret.value = "";
  } catch (error) {
    console.log(error);
  }
};

// 2.语音合成接口调用
// per配音角色
const per = ref("1");
// 输入的文本
const inputText = ref("");
// 动态绑定audio的src属性
const audioSrc = ref("");

const handleTextToAudio = async () => {
  const token = localStorage.getItem("access_token");
  if (!token) {
    return openMsg("请先获取token!", "warning");
  }
  textToAudio(token);
};
const textToAudio = async (token) => {
  const option = {
    tex: inputText.value,
    tok: token,
    cuid: `${Math.floor(Math.random() * 1000000)}`,
    ctp: "1",
    lan: "zh",
    per: per.value,
  };
  const res = await axios.post("/text2audio", qs.stringify(option), {
    headers: { "Content-Type": "application/x-www-form-urlencoded" },
    responseType: "blob",
  });
  if (res.status !== 200) {
    return openMsg(res.statusText, "warning");
  }
  openMsg("语音合成成功", "success");
  audioSrc.value = URL.createObjectURL(res.data);
};

// 3.语音识别
let recordBtn = ref("开始录音");
const audioRecordSrc = ref("");
const recordText = ref("");
window.URL = window.URL || window.webkitURL;
// recorder存放录音器实例
let recorder = null;
let isRecorder = false;
// wavBlob存放音频二进制数据
let wavBlob = null;

// 获取权限
const handleGetPermissions = () => {
  // constraints 为需要获取的权限列表,这里只需要指定音频 audio 即可。其返回是个 Promise,因为用户何时进行授权是不确定的。通过在 Promise 的回调中进行授权成功或失败的处理。在使用前需要判断浏览器是否已经支持相应的 API
  if (navigator.mediaDevices.getUserMedia) {
    const constraints = { audio: true };
    navigator.mediaDevices.getUserMedia(constraints).then(
      (stream) => {
        recorder = new HZRecorder(stream);
        console.log("初始化完成");
      },
      () => {
        console.error("授权失败!");
      }
    );
  } else {
    console.error("浏览器不支持 getUserMedia");
  }
};
// 录音
const handleRecording = () => {
  if (!isRecorder) {
    recorder && recorder.start();
    recordBtn.value = "结束录音";
    isRecorder = true;
  } else {
    recorder && recorder.stop();
    wavBlob = recorder.upload();
    console.log(wavBlob);
    audioRecordSrc.value = window.URL.createObjectURL(wavBlob);
    recordBtn.value = "开始录音";
    isRecorder = false;
  }
};
// 语音识别
const handleSpeechRecognition = async () => {
  const token = localStorage.getItem("access_token");
  if (!token) {
    return openMsg("请先获取token!", "warning");
  }
  // wavBlob = recorder.upload();
  let blobToDataURL = (blob, callback) => {
    var a = new FileReader();
    a.onload = function (e) {
      callback(e.target.result.split("data:audio/wav;base64,")[1]);
    };
    a.readAsDataURL(blob);
  };
  blobToDataURL(wavBlob, async (base_64) => {
    const res = await axios.post(
      "/server_api",
      {
        speech: base_64, //本地语音文件的的二进制语音数据 ,需要进行base64 编码。与len参数连一起使用。
        len: wavBlob.size, //字节数
        dev_pid: 1537, //普通话识别代码
        cuid: "541b:3f:5af4:b2c9",
        rate: 16000, //音频格式16k或8k 采样率、16bit 位深、单声道,
        token: token, //根据你的参数获取的token
        channel: 1, //单声道
        format: "wav", //识别的格式
      },
      {
        headers: {
          "Content-Type": "application/json",
        },
      }
    );
    recorder.clear();
    if (res.data.err_no !== 0) {
      return openMsg(res.data.err_msg, "warning");
    }
    openMsg("识别成功", "success");
    recordText.value = res.data.result[0];
    console.log("识别结果:" + res.data.result[0]);
  });
};
</script>

<style scoped>
.app {
  width: 80%;
  margin: auto;
  margin-top: 50px;
}

hr {
  margin: 30px 0;
}

h1 {
  margin: 10px 0;
}

:deep(.el-radio-group) {
  margin-bottom: 30px;
}
</style>
// recorder.js
export function HZRecorder(stream, config) {
    
    
    config = config || {
    
    };
    config.sampleBits = config.sampleBits || 16;      //采样数位 8, 16
    config.sampleRate = config.sampleRate || 16000;   //采样率16khz

    var context = new (window.webkitAudioContext || window.AudioContext)();
    var audioInput = context.createMediaStreamSource(stream);
    var createScript = context.createScriptProcessor || context.createJavaScriptNode;
    var recorder = createScript.apply(context, [4096, 1, 1]);

    var audioData = {
    
    
        size: 0          //录音文件长度
        , buffer: []     //录音缓存
        , inputSampleRate: context.sampleRate    //输入采样率
        , inputSampleBits: 16       //输入采样数位 8, 16
        , outputSampleRate: config.sampleRate    //输出采样率
        , oututSampleBits: config.sampleBits       //输出采样数位 8, 16
        , input: function (data) {
    
    
            this.buffer.push(new Float32Array(data));
            this.size += data.length;
        }
        , compress: function () {
    
     //合并压缩
            //合并
            var data = new Float32Array(this.size);
            var offset = 0;
            for (var i = 0; i < this.buffer.length; i++) {
    
    
                data.set(this.buffer[i], offset);
                offset += this.buffer[i].length;
            }
            //压缩
            var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
            var length = data.length / compression;
            var result = new Float32Array(length);
            var index = 0, j = 0;
            while (index < length) {
    
    
                result[index] = data[j];
                j += compression;
                index++;
            }
            return result;
        }
        , encodeWAV: function () {
    
    
            var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
            var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
            var bytes = this.compress();
            var dataLength = bytes.length * (sampleBits / 8);
            var buffer = new ArrayBuffer(44 + dataLength);
            var data = new DataView(buffer);

            var channelCount = 1;//单声道
            var offset = 0;

            var writeString = function (str) {
    
    
                for (var i = 0; i < str.length; i++) {
    
    
                    data.setUint8(offset + i, str.charCodeAt(i));
                }
            }

            // 资源交换文件标识符
            writeString('RIFF'); offset += 4;
            // 下个地址开始到文件尾总字节数,即文件大小-8
            data.setUint32(offset, 36 + dataLength, true); offset += 4;
            // WAV文件标志
            writeString('WAVE'); offset += 4;
            // 波形格式标志
            writeString('fmt '); offset += 4;
            // 过滤字节,一般为 0x10 = 16
            data.setUint32(offset, 16, true); offset += 4;
            // 格式类别 (PCM形式采样数据)
            data.setUint16(offset, 1, true); offset += 2;
            // 通道数
            data.setUint16(offset, channelCount, true); offset += 2;
            // 采样率,每秒样本数,表示每个通道的播放速度
            data.setUint32(offset, sampleRate, true); offset += 4;
            // 波形数据传输率 (每秒平均字节数) 单声道×每秒数据位数×每样本数据位/8
            data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
            // 快数据调整数 采样一次占用字节数 单声道×每样本的数据位数/8
            data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
            // 每样本数据位数
            data.setUint16(offset, sampleBits, true); offset += 2;
            // 数据标识符
            writeString('data'); offset += 4;
            // 采样数据总数,即数据总大小-44
            data.setUint32(offset, dataLength, true); offset += 4;
            // 写入采样数据
            if (sampleBits === 8) {
    
    
                for (var i = 0; i < bytes.length; i++, offset++) {
    
    
                    var s = Math.max(-1, Math.min(1, bytes[i]));
                    var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
                    val = parseInt(255 / (65535 / (val + 32768)));
                    data.setInt8(offset, val, true);
                }
            } else {
    
    
                for (var i = 0; i < bytes.length; i++, offset += 2) {
    
    
                    var s = Math.max(-1, Math.min(1, bytes[i]));
                    data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
                }
            }

            return new Blob([data], {
    
     type: 'audio/wav' });
        }
    };
    //开始录音
    this.start = function () {
    
    
        audioInput.connect(recorder);
        recorder.connect(context.destination);
    }

    //停止
    this.stop = function () {
    
    
        recorder.disconnect();
    }

    //获取音频文件
    this.getBlob = function () {
    
    
        this.stop();
        return audioData.encodeWAV();
    }

    //回放
    this.play = function (audio) {
    
    
    	var blob=this.getBlob();
      // saveAs(blob, "F:/3.wav");
      audio.src = window.URL.createObjectURL(this.getBlob());
    }

    //上传
    this.upload = function () {
    
    
      return this.getBlob()
    }

    //清除
    this.clear = function(){
    
    
      console.log("audioData",audioData.size)
      audioData.size = 0
      audioData.buffer = []
    }

    //音频采集
    recorder.onaudioprocess = function (e) {
    
    
        audioData.input(e.inputBuffer.getChannelData(0));
        //record(e.inputBuffer.getChannelData(0));
    }
}

export default HZRecorder

3.演示

  1. 在(3)输入语音识别应用的client_id和client_secret,点击获取token
  2. 点击获取录音权限
  3. 点击开始录音,然后结束录音,结束后会在audio上存放你的录音数据
  4. 点击语音识别,识别完成后会在按钮的右侧展示识别的文字

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/weixin_47365243/article/details/132371841
今日推荐