Android语音转文字一使用AudioRecord录音

  参考:https://www.cnblogs.com/sowhat4999/p/4439837.html

 为什么不直接使用谷歌封装好的MediaRecorder,而使用AudioRecord录音呢?因为项目中需要将语音转成文字,讯飞语音听写只支持PCM和WAV格式的音频,转成wav格式也是为了方便播放语音

下面开始上代码吧;

public class AudioRecorder {
  //采样率:8000Hz,电话所用采样率, 对于人的说话已经足够
  public static final int LONG_SAMPLE_RATE = 8000;
  // 音频数据格式:PCM 16位每个样本。保证设备支持。PCM 8位每个样本。不一定能得到设备支持。  
  public static final int ENCODING_PCM_16_BIT = AudioFormat.ENCODING_PCM_16BIT;
  // 设置音频的录制的声道CHANNEL_IN_STEREO为双声道,CHANNEL_CONFIGURATION_MONO为单声道  
  public static final int CHANNEL_IN_STEREO = AudioFormat.CHANNEL_IN_STEREO;

  private int bufferSizeInBytes;
  //保存裸数据文件路径
  private String mRawFilePath;

  //保存WAV文件路径
  private String mWavFilePath;

  private AudioRecord mAudioRecord;
  //是否取消录音
  private boolean mCancel;
  //是否正在录音
  private boolean mRunning;
  private Context mContext;
  private static AudioRecorder mAudioRecorder;
  private final ThreadPoolProxy mThreadPoolProxy;
  private OnRecordCompleteListener mCompleteListener;
  private long mStartTimes;
  private int mLen;

  private AudioRecorder(Context context) {
    mContext = context;
    //初始化线程池,避免线程过多创建,回收困难,内存消耗过多.当然也可以new Thread创建线程
    mThreadPoolProxy = ThreadPoolProxy.getInstance();
    //创建一个buffer缓冲区
    bufferSizeInBytes =
        AudioRecord.getMinBufferSize(LONG_SAMPLE_RATE, CHANNEL_IN_STEREO,
            ENCODING_PCM_16_BIT);
  }

  public void start() {
    if (mRunning) {
      return;
    }

    if (mAudioRecord == null) {
      mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, LONG_SAMPLE_RATE,
          CHANNEL_IN_STEREO, ENCODING_PCM_16_BIT, bufferSizeInBytes);
    }
    mAudioRecord.startRecording();
    mStartTimes = System.currentTimeMillis();
    mRunning = true;
    mCancel = false;
    mThreadPoolProxy.excute(mRunnable);
  }



  private Runnable mRunnable = () -> {
    writeDateTOFile();
    rawToWav(mRawFilePath, mWavFilePath);
    if (mCompleteListener != null) {
      mCompleteListener.onRecordComplete(mWavFilePath);
    }
  };
  
  public void stop() {
    if (mAudioRecord != null) {
      mLen = (int) (System.currentTimeMillis() - mStartTimes) / 1000;
      mCompleteListener.onRecordComplete(mLen);
      mRunning = false;
      mAudioRecord.stop();
      mAudioRecord.release();
      mAudioRecord = null;
    }
  }

保存音频数据

 private void writeDateTOFile() {
    byte[] audiodata = new byte[bufferSizeInBytes];
    FileOutputStream fos = null;

    final File dir = new File(FileUtils.getAppCacheDir(mContext) + "/audio");
    if (!dir.exists()) {
      dir.mkdir();
    }

    final String cacheDir = dir + File.separator + System.currentTimeMillis();

    mRawFilePath = cacheDir + "R.raw";

    mWavFilePath = cacheDir + "W.wav";
    try {
      fos = new FileOutputStream(mRawFilePath);

      while (!mCancel) {
        int readsize = mAudioRecord.read(audiodata, 0, bufferSizeInBytes);

        if (android.media.AudioRecord.ERROR_INVALID_OPERATION != readsize) {
          fos.write(audiodata);
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      ioClose(fos);
    }
  }

  /***
   *raw文件转换成wav文件
   * @param rawPath 未经处理的音频文件路径
   * @param wavPath 要保存的wav文件路径
   */
  private void rawToWav(String rawPath, String wavPath) {
    FileInputStream fis = null;
    FileOutputStream fos = null;
    long byteRate = 16 * LONG_SAMPLE_RATE * 2 / 8;

    try {
      byte[] audiodata = new byte[bufferSizeInBytes];
      fis = new FileInputStream(rawPath);
      fos = new FileOutputStream(wavPath);
      long totalAudioLen = fis.getChannel().size();
      long totalDataLen = totalAudioLen + 36;

      writeWavFileHeader(fos, totalAudioLen, totalDataLen, byteRate);
      while (fis.read(audiodata) != -1) {
        //如果取消录音,结束写入文件操作,并将文件删除
        if (mCancel) {
          FileUtils.deleteFile(wavPath);
          break;
        }
        fos.write(audiodata);
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      ioClose(fis);
      ioClose(fos);
    }
    File pcm = new File(rawPath);
    if (pcm.exists()) {
      pcm.delete();
    }
  }

  /***
   *写入wav格式头数据
   * @param fos 输出
   * @param totalAudioLen 音频长度
   * @param totalDataLen  音频长度+头部字段的大小
   * @param byteRate
   * @throws IOException
   */
  private void writeWavFileHeader(FileOutputStream fos, long totalAudioLen, long totalDataLen,
      long byteRate) throws IOException {
    byte[] header = new byte[44];
    header[0] = 'R';
    header[1] = 'I';
    header[2] = 'F';
    header[3] = 'F';
    header[4] = (byte) (totalDataLen & 0xff);
    header[5] = (byte) ((totalDataLen >> 8) & 0xff);
    header[6] = (byte) ((totalDataLen >> 16) & 0xff);
    header[7] = (byte) ((totalDataLen >> 24) & 0xff);
    header[8] = 'W';
    header[9] = 'A';
    header[10] = 'V';
    header[11] = 'E';
    header[12] = 'f';
    header[13] = 'm';
    header[14] = 't';
    header[15] = ' ';
    header[16] = 16;
    header[17] = 0;
    header[18] = 0;
    header[19] = 0;
    header[20] = 1;
    header[21] = 0;
    header[22] = (byte) 2;
    header[23] = 0;
    header[24] = (byte) (LONG_SAMPLE_RATE & 0xff);
    header[25] = (byte) ((LONG_SAMPLE_RATE >> 8) & 0xff);
    header[26] = (byte) ((LONG_SAMPLE_RATE >> 16) & 0xff);
    header[27] = (byte) ((LONG_SAMPLE_RATE >> 24) & 0xff);
    header[28] = (byte) (byteRate & 0xff);
    header[29] = (byte) ((byteRate >> 8) & 0xff);
    header[30] = (byte) ((byteRate >> 16) & 0xff);
    header[31] = (byte) ((byteRate >> 24) & 0xff);
    header[32] = (byte) (2 * 16 / 8);
    header[33] = 0;
    header[34] = 16;
    header[35] = 0;
    header[36] = 'd';
    header[37] = 'a';
    header[38] = 't';
    header[39] = 'a';
    header[40] = (byte) (totalAudioLen & 0xff);
    header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
    header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
    header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
    fos.write(header, 0, 44);
  }

  public void cancel() {
    mCancel = true;
    stop();
  }

  public void setOnRecordCompleteListener(OnRecordCompleteListener completeListener) {
    mCompleteListener = completeListener;
  }

  void ioClose(Closeable fos) {
    try {
      if (fos != null) {
        fos.close();
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }

  public static AudioRecorder getInstance(Context context) {
    if (mAudioRecorder == null) {
      synchronized (AudioRecorder.class) {
        if (mAudioRecorder == null) {
          mAudioRecorder = new AudioRecorder(context);
        }
      }
    }
    return mAudioRecorder;
  }

  public boolean isRunning() {
    return mRunning;
  }
}

线程池管理

public class ThreadPoolProxy {

  private final ThreadPoolExecutor mSingleThreadPool;

  private ThreadPoolProxy(){
    ThreadFactory namedThreadFactory = new ThreadFactoryBuilder()
        .setNameFormat("pool-%d").build();
    mSingleThreadPool = new ThreadPoolExecutor(3, 6,
        0L, TimeUnit.MILLISECONDS,
        new LinkedBlockingQueue<Runnable>(1024), namedThreadFactory, new ThreadPoolExecutor.AbortPolicy());

  }

  public static ThreadPoolProxy getInstance(){
    return  InstanceHolder.threadPoolProxy;
  }

  public void excute(Runnable runnable) {
    mSingleThreadPool.execute(runnable);
  }

  public void remove(Runnable runnable) {
   mSingleThreadPool.remove(runnable);

  }

  static class InstanceHolder{
   private static final ThreadPoolProxy threadPoolProxy  = new ThreadPoolProxy();
 }
}

猜你喜欢

转载自blog.csdn.net/zhanlv/article/details/83544838