微信小程序语音识别（调用讯飞语音听写接口）

参考文章https://blog.xinpapa.com/2017/10/30/silk-wav/

电脑需安装gcc（https://www.jianshu.com/p/ff24a81f3637）和ffmpeg（http://download.csdn.net/download/keketrtr/10206787）

项目需求，需要使用讯飞的语音识别接口，将微信小程序上传的录音文件识别成文字返回

首先去讯飞开放平台中申请开通语音识别功能

在这里面下载sdk,然后解压，注意appid与sdk是关联的，appid在初始化接口时候需要

我觉得需要注意的点：

1、下载讯飞语音SDK，把lib下的json-jena-1.0.jar和Msc.jar拷贝到工程的lib目录下。

2、将libmsc32.so、libmsc64.so、msc32.dll、msc64.dll这四个文件放到项目根目录下（普通java项目）或者WEB-INF/lib目录下（javaweb项目），对于web项目，也可以放到tomcat/bin或者引用jdk的bin目录下，但我觉得直接放到项目的lib下更好。

但linux系统的话，则需要将这四个文件放到/home/jdk1.7.0_79/lib/amd64和/home/jdk1.7.0_79/jre/lib/amd64目录下，即放到jdk或jre下面的 lib/amd64目录下。索性jdk和jre都放一份。

（不然讯飞语音会报错20021 ）

3、我在程序中还用到了获取音频文件时长，报错Could not get native library for ffmpeg-amd64，

解决办法是找到ffmpeg文件（默认在/usr/bin/下面），将其copy到/home/apache-tomcat-7.0.84/temp/jave-2（即tomcat安装目录下的子目录）下。然后重命名为 ffmpeg-amd64，问题解决。

/**
 * 获取音频文件时长（用到了jave-2.0.jar）
 * @param filePath 文件路径
 * @return
    */
private long getDuration(String filePath) {
   long duration = 0;//音频长度，秒
   File source = new File(filePath);
   Encoder encoder = new Encoder();
   MultimediaInfo m = null;
   try {
      m = encoder.getInfo(source);
   } catch (EncoderException e) {
      e.printStackTrace();
   }
   long ls = m.getDuration();
   duration = ls/1000;
   return duration;
}

我把我的java代码实现的微信小程序语音识别分享出来（已经跑通）

import net.sf.json.JSONObject;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
 * Created by ZhaoYuJie on 2018/1/15.
 */
public class VoiceUtil {
    /**
     *
     * @param silkPath silk文件的路径
     * @return 音频转换出的文字内容
     */
    public static String convertSilkToText(String silkPath){
        String pcmPath = silkPath.substring(0,silkPath.lastIndexOf("."))+".pcm";
        String wavPath = silkPath.substring(0,silkPath.lastIndexOf("."))+".wav";

        //获取当前操作系统
        String os = System.getProperty("os.name").toLowerCase();

        if(os.indexOf("linux")>=0){
            wavPath = convertSilkToWav(silkPath);
        }else if(os.indexOf("windows")>=0){
            convertSilkToPcm(silkPath,pcmPath);
            convertPcmToWav(pcmPath,wavPath);
        }else{
            return null;
        }
        return wavToWords(wavPath);
    }
    /**
     * 将silk文件转换为pcm文件（用于windows系统）
     * @param silkPath 输入的silk格式的音频文件路径，例如D:/silk.silk
     * @param pcmPath 输出的pcm格式的音频文件路径，例如D:/result.pcm
     */
    private static void convertSilkToPcm(String silkPath,String pcmPath){
        File silk = new File(silkPath);//silk文件
        File pcm = new File(pcmPath);//转码后的pcm文件
        try {
            //silk转pcm，“-Fs_API 16000”设置输入音频的采样率为16000
            String cmd = "cmd.exe /c " + ConfigUtil.getValue("windows_silk_convert_path") +" " + silk.getAbsolutePath() + " " + pcm.getAbsolutePath() + " -Fs_API 16000";
            Runtime.getRuntime().exec(cmd);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 将pcm文件转换为wav文件（用于windows系统）
     * @param pcmPath 需要转码的pcm文件路径，例如D:/result.pcm
     * @param wavPath 转码后的wav文件路径，例如D:/result.wav
     */
    private static void convertPcmToWav(String pcmPath,String wavPath){
        File pcm = new File(pcmPath);
        File wav= new File(wavPath);

        Process exec = null;
        try {
            //pcm转wav或其它格式
            String cmd = "cmd /c ffmpeg.exe -loglevel quiet -y -f s16le -ar 16000 -ac 1 -i " + pcm.getAbsolutePath() + " " + wav.getAbsolutePath();
            exec = Runtime.getRuntime().exec(cmd);
            exec.waitFor();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * silk文件转wav文件（用于linux）<br/>
     * 需要修改converter.sh脚本的第70行，将ffmpeg -y -f s16le -ar 24000 -ac 1 -i "$1.pcm" "${1%.*}.$2" > /dev/null 2>&1里面的24000改成16000（采样率）
     * @param silkPath silk文件路径
     * @return 返回wav文件路径
     */
    private static String convertSilkToWav(String silkPath) {
        File silk = new File(silkPath);//silk文件
        //执行converter.sh，silk转wav，这里执行后，会在tmp/silk/目录下生成wav音频文件
        Process exec = null;
        try {
            exec = Runtime.getRuntime().exec("sh " + ConfigUtil.getValue("linux_silk_convert_path") + " " + silk.getAbsolutePath() + " wav");
            exec.waitFor();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        String wavPath = silkPath.substring(0,silkPath.lastIndexOf("."))+".wav";
        return wavPath;
    }

    /**
     * 将wav文件转换为文字内容
     * @param wavPath
     * @return
     */
    private static String  wavToWords(String wavPath){
        //讯飞语音识别接口识别wav音频文件，转成文字返回
        SRTool sr = new SRTool();
        String words = null;
        try {
            words = sr.voice2words(wavPath);
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println("讯飞语音返回的json串："+words);
        String result = sr2words(words);
        System.out.println("讯飞识别的语音结果："+result);
        return result;
    }

    private static String sr2words(String jsonString){
        StringBuffer sb = new StringBuffer();
        String[] split = jsonString.split("}]}]}");
        for (int i = 0; i < split.length; i++) {
            String s = split[i] + "}]}]}";
            System.out.println(s);
            Map parse = JSONObject.fromObject(s);
            List<Map> ws = (List<Map>) parse.get("ws");
            for (int i1 = 0; i1 < ws.size(); i1++) {
                List<Map> cw = (List<Map>)ws.get(i1).get("cw");
                String w = cw.get(0).get("w").toString();
                sb.append(w);
            }

        }
        return sb.toString();
    }

    /**
     * 过滤掉字符串中除了中英文字符之外的字符
     * @param str
     * @return 返回过滤后的字符串
     */
    public static String delRedundantCharacters(String str){
        return str.replaceAll("(?i)[^a-zA-Z0-9\u4E00-\u9FA5]", "");
    }

    public static void main(String[] args) {
        String content = convertSilkToText("D:\\upload\\voiceDir\\4\\oHosW0Yzg79GVpkMVl18yvZtvDzA\\f0178efc0fc84befb60c7196722b3345.silk");
        System.out.println("音频文件解析结果："+content);
//        System.out.println(delRedundantCharacters("你 好。"));
    }

}

import com.iflytek.cloud.speech.*;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
 
/**
 * 讯飞语音工具
 */
public class SRTool {
 
    private int perWaitTime = 100;
 
    private StringBuffer mResult = new StringBuffer();
 
    static {
        SpeechUtility.createUtility(SpeechConstant.APPID+"="+ConfigUtil.getValue("XunFeiAppID"));//申请的appid
    }
 
    public String voice2words(String fileName) throws InterruptedException, IOException {
        return to(fileName);
    }
 
    public String to(String fileName) throws InterruptedException, IOException {
 
        File file = new File(fileName);
        if(!file.exists()){
            throw new RuntimeException("要读取的文件不存在");
        }
        FileInputStream fis = new FileInputStream(file);
        int len = 0;
        byte[] buf = new byte[fis.available()];
        fis.read(buf);
        fis.close();
 
        //1.创建SpeechRecognizer对象
        SpeechRecognizer mIat = SpeechRecognizer.createRecognizer();
        //2.设置听写参数，详见《MSC Reference Manual》SpeechConstant类
        mIat.setParameter(SpeechConstant.DOMAIN, "iat");
        mIat.setParameter(SpeechConstant.LANGUAGE, "zh_cn");
        mIat.setParameter(SpeechConstant.ACCENT, "mandarin ");
        mIat.setParameter(SpeechConstant.AUDIO_SOURCE, "-1");
        //3.开始听写
        mIat.startListening(mRecoListener);
 
        //voiceBuffer为音频数据流，splitBuffer为自定义分割接口，将其以4.8k字节分割成数组
        ArrayList<byte[]> buffers = splitBuffer(buf, buf.length, 4800);
        for (int i = 0; i < buffers.size(); i++) {
            // 每次写入msc数据4.8K,相当150ms录音数据
            mIat.writeAudio(buffers.get(i), 0, buffers.get(i).length);
        }
        mIat.stopListening();
 
        while (mIat.isListening()) {
            Thread.sleep(perWaitTime);
        }
        return mResult+"";
    }
 
    /**
     * 将字节缓冲区按照固定大小进行分割成数组
     *
     * @param buffer 缓冲区
     * @param length 缓冲区大小
     * @param spsize 切割块大小
     * @return
     */
    private ArrayList<byte[]> splitBuffer(byte[] buffer, int length, int spsize) {
        ArrayList<byte[]> array = new ArrayList<byte[]>();
        if (spsize <= 0 || length <= 0 || buffer == null
                || buffer.length < length)
            return array;
        int size = 0;
        while (size < length) {
            int left = length - size;
            if (spsize < left) {
                byte[] sdata = new byte[spsize];
                System.arraycopy(buffer, size, sdata, 0, spsize);
                array.add(sdata);
                size += spsize;
            } else {
                byte[] sdata = new byte[left];
                System.arraycopy(buffer, size, sdata, 0, left);
                array.add(sdata);
                size += left;
            }
        }
        return array;
    }
 
    //听写监听器
    private RecognizerListener mRecoListener = new RecognizerListener() {
        public void onResult(RecognizerResult results, boolean isLast) {
            System.out.println("Result:" + results.getResultString());
            mResult.append(results.getResultString());
        }
 
        //会话发生错误回调接口
        public void onError(SpeechError error) {
            System.out.println(error.getErrorCode()+"=========="+error.getErrorDesc());
            System.out.println(error);
        }
 
        //开始录音
        public void onBeginOfSpeech() {
        }
 
        //音量值0~30
        public void onVolumeChange(int volume) {
        }
 
        @Override
        public void onVolumeChanged(int i) {
 
        }
 
        @Override
        public void onEndOfSpeech() {
 
        }
 
        @Override
        public void onEvent(int i, int i1, int i2, String s) {
 
        }
    };
}

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

public class ConfigUtil {
   private static final Properties prop = new Properties();
   
   static {
      InputStream is = null;
      try {
         is = ConfigUtil.class.getClassLoader().getResourceAsStream("config.properties");
         prop.load(is);
      } catch (IOException e) {
         e.printStackTrace();
      }finally{
         if(is != null){
            try {
               is.close();
            } catch (IOException e) {
               e.printStackTrace();
            }
         }
      }
   }
   
   public static String getValue(String key){
      return prop.getProperty(key);
   }
   
   public static String getValue(String key,String defaultValue){
      return prop.getProperty(key,defaultValue);
   }
   
   public static Integer getInteger(String key){
      return Integer.valueOf(prop.getProperty(key));
   }
   
}

config.properties

#linux系统下silk转wav的命令路径
linux_silk_convert_path=/usr/silk-v3-decoder-master/converter.sh
#linux系统下silk转pcm的命令路径
windows_silk_convert_path=D:/silk-v3-decoder-master/windows/silk_v3_decoder.exe

微信小程序语音识别（调用讯飞语音听写接口）

猜你喜欢