Unity access SDK-03 Baidu speech recognition (Web version)

One, get Licenese

1. First go to   https://ai.baidu.com  Baidu Voice official website to register an account and apply to become a developer.

2. Create an application and fill in the corresponding application description

 

3. Get appId, apiKey, secretKey

 

Two, write code 

1. Create a new base class for Baidu Voice and name it SpeechBase (code is as follows)

using UnityEngine;

namespace BaiduSpeech
{
    /// <summary>百度语音基类</summary>
    public abstract class SpeechBase : MonoBehaviour
    {
        public string appId { get; set; }
        public string apiKey { get; set; }
        public string secretKey { get; set; }

        private void Awake()
        {
            OnAwake();
            OnInitPlatform();
        }

        private void Start()
        {
            OnStart();
        }

        private void Update()
        {
            OnUpdate();
        }

        private void OnDestroy()
        {
            OnDispose();
        }

        //----------------------------------------公共函数----------------------------------------
        /// <summary>初始化</summary>
        public virtual void OnAwake() { }
        public virtual void OnStart() { }
        public virtual void OnUpdate() { }
        /// <summary>初始化平台</summary>
        public virtual void OnInitPlatform() { }
        /// <summary>释放算法</summary>
        public virtual void OnDispose() { }

    }
}

2. Create a new voice-to-text base class named AsrBase (code is as follows)

namespace BaiduSpeech
{
    /// <summary>语音转文字基类</summary>
    public abstract class AsrBase : SpeechBase
    {
        /// <summary>初始化语音</summary>
        public virtual void AsrInit() { }
        /// <summary>开始录音</summary>
        public virtual void VoiceStart(string json) { }
        /// <summary>取消本次识别,取消后将立即停止不会返回识别结果</summary>
        public virtual void VoiceCancel() { }
        /// <summary>停止录音</summary>
        public virtual void VoiceStop() { }
    }
}

3. Create a new class named WebAsrParams for Json parsing (code is as follows)

[Serializable]
    public class WebAsrParams
    {
        public int err_no;
        public string err_msg;
        public long corpus_no;
        public string sn;
        public string[] result;
    }

 4. Create a new class named AsrForWeb (code is as follows)

using System;
using System.Collections;
using System.Text.RegularExpressions;
using UnityEngine;
using UnityEngine.Networking;

namespace BaiduSpeech
{
    /// <summary>Web接口语音转文本功能API管理</summary>
    public class AsrForWeb : AsrBase
    {
        /// <summary>记录accesstoken令牌</summary>
        private string accessToken = string.Empty;
        /// <summary>百度请求令牌API地址</summary>
        private const string ACCESS_TOKEN_API_URL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client";
        /// <summary>标记是否有麦克风</summary>
        private bool isHaveMic = false;
        /// <summary>当前录音设备名称</summary>
        private string currentDeviceName = string.Empty;
        /// <summary>录音频率,控制录音质量(8000,16000)</summary>
        private int recordFrequency = 8000;
        /// <summary>上次按下时间戳</summary>
        private double lastPressTimestamp = 0;
        /// <summary>表示录音的最大时长</summary>
        private int recordMaxLength = 10;
        /// <summary>实际录音长度</summary>
        private int trueLength = 0;
        /// <summary>是否循环</summary>
        private bool isLoop = false;
        private AudioClip saveAudioClip;

        //初始化平台
        public override void OnInitPlatform()
        {
            //获取麦克风设备,判断是否有麦克风设备
            if (Microphone.devices.Length > 0)
            {
                isHaveMic = true;
                currentDeviceName = Microphone.devices[0];
            }
        }

        /// <summary>初始化语音</summary>
        public override void AsrInit()
        {
            if (isHaveMic == false || Microphone.IsRecording(currentDeviceName))
            {
                Debug.LogWarning(GetType() + "/SpeechInit()/当前设备没有麦克风!");
            }
            else
            {
               //初始化语音成功
            }
        }

        /// <summary>
        /// 开始录音
        /// </summary>
        /// <param name="json">详情请移步 https://ai.baidu.com/ai-doc/SPEECH/9k38lxfnk </param>
        public override void VoiceStart(string json)
        {
            if (isHaveMic == false || Microphone.IsRecording(currentDeviceName))
            {
                return;
            }

            lastPressTimestamp = GetTimestampOfNowWithMillisecond();
            saveAudioClip = Microphone.Start(currentDeviceName, isLoop, recordMaxLength, recordFrequency);
        }

        /// <summary>取消本次识别,取消后将立即停止不会返回识别结果</summary>
        public override void VoiceCancel()
        {
            if (isHaveMic == false || !Microphone.IsRecording(currentDeviceName))
            {
                return;
            }

            Microphone.End(currentDeviceName);
        }

        /// <summary>停止录音</summary>
        public override void VoiceStop()
        {
            if (isHaveMic == false || !Microphone.IsRecording(currentDeviceName)) { return; }

            Microphone.End(currentDeviceName);
            trueLength = Mathf.CeilToInt((float)(GetTimestampOfNowWithMillisecond() - lastPressTimestamp) / 1000f);

            if (trueLength > 1)
            {
                StartCoroutine(StartAsr());
            }
            else
            {
                Debug.LogWarning(GetType() + "/VoiceStop()/录音时长过短!");
            }
        }

        /// <summary>获取毫秒级别的时间戳,用于计算按下录音时长</summary>
        private double GetTimestampOfNowWithMillisecond()
        {
            return (DateTime.Now.ToUniversalTime().Ticks - 621355968000000000) / 10000;
        }

        /// <summary>获取accessToken请求令牌</summary>
        private IEnumerator GetAccessToken()
        {
            var uri = string.Format(ACCESS_TOKEN_API_URL + "_id={0}&client_secret={1}", apiKey, secretKey);
            UnityWebRequest unityWebRequest = UnityWebRequest.Get(uri);
            yield return unityWebRequest.SendWebRequest();
            if (unityWebRequest.isDone)
            {
                Match match = Regex.Match(unityWebRequest.downloadHandler.text, @"access_token.:.(.*?).,");
                if (match.Success)
                {
                    accessToken = match.Groups[1].ToString();
                }
                else
                {
                    Debug.LogWarning(GetType() + "/GetAccessToken()/验证错误,获取AccessToken失败!");
                }
            }
        }

        /// <summary>发起语音识别请求</summary>
        private IEnumerator StartAsr()
        {
            if (string.IsNullOrEmpty(accessToken)) { yield return GetAccessToken(); }

            string asrResult = string.Empty;

            //处理当前录音数据为PCM16
            float[] samples = new float[recordFrequency * trueLength * saveAudioClip.channels];
            saveAudioClip.GetData(samples, 0);
            var samplesShort = new short[samples.Length];
            for (var index = 0; index < samples.Length; index++)
            {
                samplesShort[index] = (short)(samples[index] * short.MaxValue);
            }
            byte[] datas = new byte[samplesShort.Length * 2];
            Buffer.BlockCopy(samplesShort, 0, datas, 0, datas.Length);

            string url = string.Format("{0}?cuid={1}&token={2}", "https://vop.baidu.com/server_api", SystemInfo.deviceUniqueIdentifier, accessToken);

            WWWForm wwwForm = new WWWForm();
            wwwForm.AddBinaryData("audio", datas);
            UnityWebRequest unityWebRequest = UnityWebRequest.Post(url, wwwForm);
            unityWebRequest.SetRequestHeader("Content-Type", "audio/pcm;rate=" + recordFrequency);

            yield return unityWebRequest.SendWebRequest();

            if (string.IsNullOrEmpty(unityWebRequest.error))
            {
                asrResult = unityWebRequest.downloadHandler.text;

                Debug.Log(asrResult);

                WebAsrParams webAsrParams = JsonUtility.FromJson<WebAsrParams>(asrResult);
            }
            else
            {
                Debug.LogWarning(GetType() + "/StartAsr()/语音识别失败!");
            }
        }
    }
}

 

Unity source code: https://github.com/yongliangchen/BaiduSpeechForUnity.git

Android source code: https://github.com/yongliangchen/BaiduSpeechForAndroid.git

QQ exchange group: 947618353

 

Guess you like

Origin blog.csdn.net/a451319296/article/details/108970244