Unity集成百度语音识别和合成--REST API

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wzw_ice/article/details/79031390

直接上unity的C#脚本代码

百度语音识别

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Xml;
using LitJson;
using System.Text;
using System;
using UnityEngine.UI;
using System.IO;

public class showVoiceResult1 : MonoBehaviour {

    private string token;                           //access_token
    private string cuid = "liang";                  //用户标识
    private string format = "wav";                  //语音格式
    private int rate = 8000;                        //采样率
    private int channel = 1;                        //声道数
    private string speech;                          //语音数据,进行base64编码
    private int len;                                //原始语音长度
    private string lan = "zh";                      //语种

    private string grant_Type = "client_credentials";
    private string client_ID = "********";                       //百度appkey
    private string client_Secret = "******";                   //百度Secret Key

    private string baiduAPI = "http://vop.baidu.com/server_api";
    private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";

    private byte[] clipByte;
    public Text debugText;

    /// <summary>
    /// 
    /// 转换出来的TEXT
    /// </summary>
    public static string audioToString;

    private AudioSource aud;
    private int audioLength;//录音的长度

    void Start () {

    }

    // Update is called once per frame
    void Update () {
        debugText.text = audioToString;
    }
    /// <summary>
    /// 获取百度用户令牌
    /// </summary>
    /// <param name="url">获取的url</param>
    /// <returns></returns>
    private IEnumerator GetToken(string url)
    {
        WWWForm getTForm = new WWWForm();
        getTForm.AddField("grant_type", grant_Type);
        getTForm.AddField("client_id", client_ID);
        getTForm.AddField("client_secret", client_Secret);

        WWW getTW = new WWW(url, getTForm);
        yield return getTW;
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
                StartCoroutine(GetAudioString(baiduAPI));
            }
            else
                Debug.LogError(getTW.error);
        }
    }

    private IEnumerator GetAudioString(string url)
    {
        JsonWriter jw = new JsonWriter();
        jw.WriteObjectStart();
        jw.WritePropertyName("format");
        jw.Write(format);
        jw.WritePropertyName("rate");
        jw.Write(rate);
        jw.WritePropertyName("channel");
        jw.Write(channel);
        jw.WritePropertyName("token");
        jw.Write(token);
        jw.WritePropertyName("cuid");
        jw.Write(cuid);
        jw.WritePropertyName("len");
        jw.Write(len);
        jw.WritePropertyName("speech");
        jw.Write(speech);
        jw.WriteObjectEnd();
        WWWForm w = new WWWForm();


        WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
        yield return getASW;
        if (getASW.isDone)
        {
            if (getASW.error == null)
            {
                JsonData getASWJson = JsonMapper.ToObject(getASW.text);
                if (getASWJson["err_msg"].ToString() == "success.")
                {
                    audioToString = getASWJson["result"][0].ToString();
                    if (audioToString.Substring(audioToString.Length - 1) == ",")
                        audioToString = audioToString.Substring(0, audioToString.Length - 1);
                    Debug.Log(audioToString);
                }
            }
            else
            {
                Debug.LogError(getASW.error);
            }
        }
    }

    public void StartMic()
    {

        if (Microphone.devices.Length == 0) {
            Debug.Log ("no devices");
            return;
        } 
        Microphone.End(null);
        Debug.Log("Start");
        Debug.Log(Microphone.devices);
        aud.clip = Microphone.Start("Built-in Microphone", false, 10, rate);
    }

    /// <summary>
    /// 结束录音
    /// </summary>
    public void EndMic()
    {
        int lastPos = Microphone.GetPosition(null);
        if (Microphone.IsRecording(null))
            audioLength = lastPos / rate;//录音时长  
        else
            audioLength = 10;
        Debug.Log("Stop");
        Microphone.End(null);

        clipByte = GetClipData();
        len = clipByte.Length;
        speech = Convert.ToBase64String(clipByte);
        StartCoroutine(GetToken(getTokenAPIPath));
        Debug.Log(len);
        Debug.Log(audioLength);
    }

    /// <summary>
    /// 把录音转换为Byte[]
    /// </summary>
    /// <returns></returns>
    public byte[] GetClipData()
    {
        if (aud.clip == null)
        {
            Debug.LogError("录音数据为空");
            return null;
        }

        float[] samples = new float[aud.clip.samples];

        aud.clip.GetData(samples, 0);


        byte[] outData = new byte[samples.Length * 2];

        int rescaleFactor = 32767; //to convert float to Int16   

        for (int i = 0; i < samples.Length; i++)
        {
            short temshort = (short)(samples[i] * rescaleFactor);

            byte[] temdata = System.BitConverter.GetBytes(temshort);

            outData[i * 2] = temdata[0];
            outData[i * 2 + 1] = temdata[1];
        }
        if (outData == null || outData.Length <= 0)
        {
            Debug.LogError("录音数据为空");
            return null;
        }

        //return SubByte(outData, 0, audioLength * 8000 * 2);
        return outData;
    }
}

百度语音主要借鉴了另一篇文章,地址忘了

百度语音合成部分

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Xml;
using LitJson;
using System.Text;
using System;
using UnityEngine.UI;
using System.IO;

public class showTextTTSResult : MonoBehaviour {

    private string text;  //user input text
    private string token;                           //access_token
    private string cuid = "***";                  //current user id
    private int ctp = 1; // client type choose, web is only value 1
    private string lan = "zh"; 
    private int spd = 5;
    private int pit = 5;
    private int vol = 5;
    private int per = 3;    //person voice 

    private string grant_Type = "client_credentials";
    private string client_ID = "****";                       //百度appkey
    private string client_Secret = "****";                   //百度Secret Key

    private string baiduAPI = "http://tsn.baidu.com/text2audio";
    private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";

    private byte[] clipByte;
    public Text debugText;
    public Text debugMsg;

    /// <summary>
    /// 
    /// 转换出来的TEXT
    /// </summary>
    public static string audioToString;

    private AudioSource aud;
    private int audioLength;//录音的长度
    private string filePath;
    void Start () {

    }

    // Update is called once per frame
    void Update () {
        /*if (audioToString != null) {
            debugText.text = audioToString;
        }*/


    }
    /// <summary>
    /// get token
    /// </summary>
    /// <param name="url">url</param>
    /// <returns></returns>
    private IEnumerator GetToken(string url)
    {
        WWWForm getTForm = new WWWForm();
        getTForm.AddField("grant_type", grant_Type);
        getTForm.AddField("client_id", client_ID);
        getTForm.AddField("client_secret", client_Secret);

        WWW getTW = new WWW(url, getTForm);
        yield return getTW;

        Debug.Log (getTW.text);
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
                Debug.Log (token);
                debugMsg.text += "token:"+token+"\n";
                //StartCoroutine(GetAudioString(baiduAPI));
                StartCoroutine(GetTextAudio(baiduAPI));
            }
            else
                Debug.LogError(getTW.error);
        }
    }

    private IEnumerator GetTextAudio(string url){
        //url?lan ctp  cuid  tok tex vol per spd pit
        WWWForm getTForm = new WWWForm();
        getTForm.AddField ("lan", lan);
        getTForm.AddField ("ctp", ctp);
        getTForm.AddField ("cuid", cuid);
        getTForm.AddField ("tok", token);
        getTForm.AddField ("tex", /*WWW.EscapeURL(*/debugText.text/*)*/);
        getTForm.AddField ("vol",vol);
        getTForm.AddField ("per", per);
        getTForm.AddField ("spd", spd);
        getTForm.AddField ("pit", pit);

        WWW getTW = new WWW (url,getTForm);
        yield return getTW;
        byte[] s = getTW.bytes;
        filePath = Application.persistentDataPath+"/1.mp3";
        //filePath = "/data/data/com.example.baiduTTS/1.mp3";
        File.Delete (filePath);
        if (writeFile (s, filePath)) {
            debugMsg.text += "success to translate txt to voice\n";
            debugMsg.text += "the voice byte[] length:"+s.Length+"\n";
        } else {
            debugMsg.text = "fail";
        }
        WWW w = new WWW ("file://"+filePath);
        aud.clip = w.GetAudioClip (false, false, AudioType.MPEG);

        Debug.Log (debugText.text);
        //debugMsg.text += "txt source:" + debugText.text+"\n";
        Debug.Log (s.Length);
        if (getTW.isDone) {
            if (getTW.error == null) {
                //debugMsg.text = "合成成功 音频字节长度为"+getTW.bytesDownloaded;
                //Debug.Log (getTW.bytesDownloaded);
                //JsonData getASWJson = JsonMapper.ToObject (getTW.text);
                //Debug.Log (getASWJson.Count);
                //Debug.Log (getASWJson["result"]);
            }else{
                Debug.Log (getTW.error);
            }
        }

    }


    private bool writeFile(byte[] readByte,string fileName){
        FileStream pFileStream = null;
        try{
            pFileStream = new FileStream(fileName,FileMode.OpenOrCreate);
            pFileStream.Write(readByte,0,readByte.Length);
        }catch{
            return false;
        }finally{
            if (pFileStream != null) {
                pFileStream.Close ();
            }
        }
        return true;
    }
    public void startTTS()
    {
        debugMsg.text = "";
        StartCoroutine(GetToken(getTokenAPIPath));
    }

    public void playAud(){

        aud.Play ();
        /*if (!aud.isPlaying) {
            aud.Play ();
        }*/
        debugMsg.text += "play the audio:"+aud.isPlaying+"\n";
        debugMsg.text += "the audio useful:"+aud.enabled+"\n";

    }
}

仿照百度语音识别脚本写的,里面重点主要是获取的音频无法在unity直接播放,主要是文件夹权限问题,unity可读写文件夹和Android不一样,有固定的对应文件夹,Application.persistentDataPath是一个可读写文件夹,相关知识有博客,可自行搜索,地址忘了。aud.clip = w.GetAudioClip (false, false, AudioType.MPEG);是将MP3文件赋给unity的音频对象。


时隔几个月,现在使用此脚本的时候报错,¥_¥上周在项目中还能用
补充下暂时的情况:经过给评论区同学答疑,发现麦克风设备只能有一个,如果是两个就无法Start。但现在麦克风能启动,但数据获取不到,解决中。。。@_@

最近事情多,找到解决方案后再进行说明。。。

猜你喜欢

转载自blog.csdn.net/wzw_ice/article/details/79031390