Unity speech recognition/access to Baidu AIP simple test demo
Create a new script BaiduASR.cs. Because in some parts of the script, I need to obtain the nth sub-object based on the location of my project, so during actual use, I need to make corresponding modifications based on my own project. Let’s look at the code directly.
using System ;
using System. Collections ;
using System. Text. RegularExpressions ;
using UnityEngine ;
using UnityEngine. EventSystems ;
using UnityEngine. Networking ;
using UnityEngine. UI ;
[ RequireComponent ( typeof ( AudioListener ) ) , RequireComponent ( typeof ( AudioSource ) ) ]
public class BaiduASR : MonoBehaviour , IPointerDownHandler , IPointerUpHandler
{
string apiKey = "自己的apiKey" ;
string secretKey = "自己的secretKey" ;
string accessToken = string . Empty;
string asrResult = string . Empty;
private bool isHaveMic = false ;
string currentDeviceName = string . Empty;
int recordFrequency = 8000 ;
double lastPressTimestamp = 0 ;
int recordMaxLength = 10 ;
int trueLength = 0 ;
[ HideInInspector ]
public AudioClip saveAudioClip;
Text textBtn;
Text textResult;
AudioSource audioSource;
void Start ( )
{
if ( Microphone. devices. Length > 0 )
{
isHaveMic = true ;
currentDeviceName = Microphone. devices[ 0 ] ;
}
textBtn = this . transform. GetChild ( 0 ) . GetComponent < Text> ( ) ;
audioSource = this . GetComponent < AudioSource> ( ) ;
textResult = this . transform. parent. GetChild ( 1 ) . GetComponent < Text> ( ) ;
}
public bool StartRecording ( bool isLoop = false )
{
if ( isHaveMic == false || Microphone. IsRecording ( currentDeviceName) )
{
return false ;
}
lastPressTimestamp = GetTimestampOfNowWithMillisecond ( ) ;
saveAudioClip = Microphone. Start ( currentDeviceName, isLoop, recordMaxLength, recordFrequency) ;
return true ;
}
public int EndRecording ( )
{
if ( isHaveMic == false || ! Microphone. IsRecording ( currentDeviceName) )
{
return 0 ;
}
Microphone. End ( currentDeviceName) ;
return Mathf. CeilToInt ( ( float ) ( GetTimestampOfNowWithMillisecond ( ) - lastPressTimestamp) / 1000f ) ;
}
public double GetTimestampOfNowWithMillisecond ( )
{
return ( DateTime. Now. ToUniversalTime ( ) . Ticks - 621355968000000000 ) / 10000 ;
}
public void OnPointerDown ( PointerEventData eventData)
{
textBtn. text = "松开识别" ;
StartRecording ( ) ;
}
public void OnPointerUp ( PointerEventData eventData)
{
textBtn. text = "按住说话" ;
trueLength = EndRecording ( ) ;
if ( trueLength > 1 )
{
audioSource. PlayOneShot ( saveAudioClip) ;
StartCoroutine ( _StartBaiduYuYin ( ) ) ;
}
else
{
textResult. text = "录音时长过短" ;
}
}
IEnumerator _GetAccessToken ( )
{
var uri =
string . Format (
"https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={0}&client_secret={1}" ,
apiKey, secretKey) ;
UnityWebRequest unityWebRequest = UnityWebRequest. Get ( uri) ;
yield return unityWebRequest. SendWebRequest ( ) ;
if ( unityWebRequest. isDone)
{
Match match = Regex. Match ( unityWebRequest. downloadHandler. text, @"access_token.:.(.*?).," ) ;
if ( match. Success)
{
accessToken = match. Groups[ 1 ] . ToString ( ) ;
}
else
{
textResult. text = "验证错误,获取AccessToken失败!!!" ;
}
}
}
IEnumerator _StartBaiduYuYin ( )
{
if ( string . IsNullOrEmpty ( accessToken) )
{
yield return _GetAccessToken ( ) ;
}
asrResult = string . Empty;
float [ ] samples = new float [ recordFrequency * trueLength * saveAudioClip. channels] ;
saveAudioClip. GetData ( samples, 0 ) ;
var samplesShort = new short [ samples. Length] ;
for ( var index = 0 ; index < samples. Length; index++ )
{
samplesShort[ index] = ( short ) ( samples[ index] * short . MaxValue) ;
}
byte [ ] datas = new byte [ samplesShort. Length * 2 ] ;
Buffer. BlockCopy ( samplesShort, 0 , datas, 0 , datas. Length) ;
string url = string . Format ( "{0}?cuid={1}&token={2}" , "https://vop.baidu.com/server_api" , SystemInfo. deviceUniqueIdentifier, accessToken) ;
WWWForm wwwForm = new WWWForm ( ) ;
wwwForm. AddBinaryData ( "audio" , datas) ;
UnityWebRequest unityWebRequest = UnityWebRequest. Post ( url, wwwForm) ;
unityWebRequest. SetRequestHeader ( "Content-Type" , "audio/pcm;rate=" + recordFrequency) ;
yield return unityWebRequest. SendWebRequest ( ) ;
if ( string . IsNullOrEmpty ( unityWebRequest. error) )
{
asrResult = unityWebRequest. downloadHandler. text;
if ( Regex. IsMatch ( asrResult, @"err_msg.:.success" ) )
{
Match match = Regex. Match ( asrResult, "result.:..(.*?)..]" ) ;
if ( match. Success)
{
asrResult = match. Groups[ 1 ] . ToString ( ) ;
}
}
else
{
asrResult = "识别结果为空" ;
}
textResult. text = asrResult;
}
}
}
The screenshot effect in the program is as follows