iOS语音书写功能(语音转文本)

Demo下载地址

最近在项目开发中,需要将语音识别转换成文本的功能。研究了下科大讯飞,附上Demo分享给大家。

研发前先得做一些准备。

1、注册科大讯飞开发者帐号(http://www.xfyun.cn

2、下载开发平台(iOS、或android,或其他)所需要的SDK(SDK包含:说明文档、SDK即iflyMSC.framework、Demo)

3、项目中添加SDK(添加时,先将SDK复制粘贴到项目文件,再通过addframe的方法添加到项目引用),及相关联的framework

添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或选择对应的framework-add

4、使用时要添加对应的头文件


特别说明:

1、使用SDK关联的APPID存在于下载的Demo中,如果SDK有替换的话APPID应该跟着一起替换。

扫描二维码关注公众号,回复: 174735 查看本文章

2、添加其他framework  

 libz.tbd

 libc++.tbd

 CoreGraphics.framework

 QuartzCore.framework 

 AddressBook.framework 

 CoreLocation.framework 

 UIKit.framework 

 AudioToolbox.framework 

 Foundation.framework 

 SystemConfiguration.framework

 AVFoundation.framework

 CoreTelephoney.framework

3Bitcode属性设置为NO(TARGETS-Build Settings-Build Options-Enable Bitcode-NO)

4在使用前,务必在AppDelegate的方法中"

- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {}"进行初始化操作。

5、需要有网络的情况下才能使用。


如图

下载的科大讯飞SDK文件


Demo中的APPID


添加SDK,及添加关联framework



设置Bitcode属性为 NO




语音转文件实现代码

.h文件

#import <Foundation/Foundation.h>

// 导入头文件
#import "iflyMSC.framework/Headers/IFlyMSC.h"
#import "iflyMSC.framework/Headers/IFlySpeechUtility.h"
#import "iflyMSC/IFlySpeechConstant.h"


#pragma mark - 初始化参数类

/**************************************************************************/

@interface IATConfig : NSObject

+ (IATConfig *)sharedInstance;

+ (NSString *)mandarin;
+ (NSString *)cantonese;
+ (NSString *)henanese;
+ (NSString *)chinese;
+ (NSString *)english;
+ (NSString *)lowSampleRate;
+ (NSString *)highSampleRate;
+ (NSString *)isDot;
+ (NSString *)noDot;


/**
 以下参数,需要通过 iFlySpeechRecgonizer 进行设置
 */
@property (nonatomic, strong) NSString *speechTimeout;
@property (nonatomic, strong) NSString *vadEos;
@property (nonatomic, strong) NSString *vadBos;

@property (nonatomic, strong) NSString *language;
@property (nonatomic, strong) NSString *accent;

@property (nonatomic, strong) NSString *dot;
@property (nonatomic, strong) NSString *sampleRate;


/**
 以下参数无需设置 不必关
 */
@property (nonatomic, assign) BOOL haveView;
@property (nonatomic, strong) NSArray *accentIdentifer;
@property (nonatomic, strong) NSArray *accentNickName;

@end

/**************************************************************************/


#pragma mark - 语音听写类

@interface VoiceConversion : NSObject

/// 启动初始化语音程序
+ (void)VoiceInitialize;


/// 开始录音
- (void)voiceStart:(void (^)(BOOL isStart))startListening 
       speechBegin:(void (^)(void))begin 
         speechEnd:(void (^)(void))end 
       speechError:(void (^)(BOOL isSuccess))error 
      speechResult:(void (^)(NSString *text))result 
      speechVolume:(void (^)(int volume))volume;

/// 取消录音
- (void)voiceCancel;

/// 停止录音
- (void)voiceStop;

@end

.m文件

#import "VoiceConversion.h"

#pragma mark - 初始化参数类

/**************************************************************************/

static NSString *const PUTONGHUA = @"mandarin";
static NSString *const YUEYU     = @"cantonese";
static NSString *const HENANHUA  = @"henanese";
static NSString *const ENGLISH   = @"en_us";
static NSString *const CHINESE   = @"zh_cn";

@implementation IATConfig

- (id)init
{
    self  = [super init];
    if (self)
    {
        [self defaultSetting];
        return  self;
    }
    return nil;
}

+ (IATConfig *)sharedInstance
{
    static IATConfig  * instance = nil;
    static dispatch_once_t predict;
    dispatch_once(&predict, ^{
        instance = [[IATConfig alloc] init];
    });
    return instance;
}

- (void)defaultSetting
{
    _speechTimeout = @"30000";
    _vadEos = @"3000";
    _vadBos = @"3000";
    _dot = @"1";
    _sampleRate = @"16000";
    _language = CHINESE;
    _accent = PUTONGHUA;
    _haveView = NO;//默认是不dai界面的
    _accentNickName = [[NSArray alloc] initWithObjects:@"粤语", @"普通话", @"河南话", @"英文", nil];
}

+ (NSString *)mandarin
{
    return PUTONGHUA;
}

+ (NSString *)cantonese
{
    return YUEYU;
}

+ (NSString *)henanese
{
    return HENANHUA;
}

+ (NSString *)chinese
{
    return CHINESE;
}

+ (NSString *)english
{
    return ENGLISH;
}

+ (NSString *)lowSampleRate
{
    return @"8000";
}

+ (NSString *)highSampleRate
{
    return @"16000";
}

+ (NSString *)isDot
{
    return @"1";
}

+ (NSString *)noDot
{
    return @"0";
}

@end

/**************************************************************************/

#pragma mark - 语音听写类

static NSString *const VoiceAPPID   = @"572016e4";
static NSString *const VoiceTimeOut = @"20000";

@interface VoiceConversion () <IFlySpeechRecognizerDelegate>

@property (nonatomic, strong) NSMutableString *resultText;
@property (nonatomic, strong) IFlySpeechRecognizer *iFlySpeechRecognizer;

@property (nonatomic, copy) void (^beginSpeech)(void);
@property (nonatomic, copy) void (^endSpeech)(void);
@property (nonatomic, copy) void (^errorSpeech)(BOOL isSuccess);
@property (nonatomic, copy) void (^resultSpeech)(NSString *text);
@property (nonatomic, copy) void (^volumeSpeech)(int volume);

@end

@implementation VoiceConversion

#pragma mark 初始化------------

/// 启动初始化语音程序
+ (void)VoiceInitialize
{
    // 设置sdk的log等级,log保存在下面设置的工作路径中
    [IFlySetting setLogFile:LVL_ALL];
    
    // 打开输出在console的log开关
    [IFlySetting showLogcat:YES];
    
    // 设置sdk的工作路径
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
    NSString *cachePath = [paths objectAtIndex:0];
    [IFlySetting setLogFilePath:cachePath];
    
    // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111.
    NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];
    [IFlySpeechUtility createUtility:initString];
}

#pragma mark 实例化------------

- (void)dealloc
{
    [self voiceCancel];
}

- (NSMutableString *)resultText
{
    if (!_resultText)
    {
        _resultText = [[NSMutableString alloc] init];
    }
    
    return _resultText;
}

- (IFlySpeechRecognizer *)iFlySpeechRecognizer
{
    if (_iFlySpeechRecognizer == nil)
    {
        _iFlySpeechRecognizer = [IFlySpeechRecognizer sharedInstance];
        
        [_iFlySpeechRecognizer setParameter:@"" forKey:[IFlySpeechConstant PARAMS]];
        // 设置听写模式
        [_iFlySpeechRecognizer setParameter:@"iat" forKey:[IFlySpeechConstant IFLY_DOMAIN]];
    }
    
    return _iFlySpeechRecognizer;
}

- (void)initializeVoice
{
    self.iFlySpeechRecognizer.delegate = self;

    IATConfig *instance = [IATConfig sharedInstance];
        
    // 设置最长录音时间
    [self.iFlySpeechRecognizer setParameter:instance.speechTimeout forKey:[IFlySpeechConstant SPEECH_TIMEOUT]];
    // 设置后端点
    [self.iFlySpeechRecognizer setParameter:instance.vadEos forKey:[IFlySpeechConstant VAD_EOS]];
    // 设置前端点
    [self.iFlySpeechRecognizer setParameter:instance.vadBos forKey:[IFlySpeechConstant VAD_BOS]];
    // 网络等待时间
    [self.iFlySpeechRecognizer setParameter:@"20000" forKey:[IFlySpeechConstant NET_TIMEOUT]];
    
    // 设置采样率,推荐使用16K
    [self.iFlySpeechRecognizer setParameter:instance.sampleRate forKey:[IFlySpeechConstant SAMPLE_RATE]];
    
    if ([instance.language isEqualToString:[IATConfig chinese]])
    {
        // 设置语言
        [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];
        // 设置方言
        [self.iFlySpeechRecognizer setParameter:instance.accent forKey:[IFlySpeechConstant ACCENT]];
    }
    else if ([instance.language isEqualToString:[IATConfig english]])
    {
        [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];
    }
    
    // 设置是否返回标点符号
    [self.iFlySpeechRecognizer setParameter:instance.dot forKey:[IFlySpeechConstant ASR_PTT]];
}

#pragma mark 语音听写方法------------

/// 开始录音
- (void)voiceStart:(void (^)(BOOL isStart))startListening 
       speechBegin:(void (^)(void))begin 
         speechEnd:(void (^)(void))end 
       speechError:(void (^)(BOOL isSuccess))error 
      speechResult:(void (^)(NSString *text))result 
      speechVolume:(void (^)(int volume))volume
{
    [self.resultText setString:@""];
    
    // 回调设置
    self.beginSpeech = [begin copy];
    self.endSpeech = [end copy];
    self.errorSpeech = [error copy];
    self.resultSpeech = [result copy];
    self.volumeSpeech = [volume copy];
    
    
    // 初始化设置
    [self initializeVoice];
    
    [self.iFlySpeechRecognizer cancel];
    
    // 设置音频来源为麦克风
    [self.iFlySpeechRecognizer setParameter:IFLY_AUDIO_SOURCE_MIC forKey:@"audio_source"];
    
    // 设置听写结果格式为json
    [self.iFlySpeechRecognizer setParameter:@"json" forKey:[IFlySpeechConstant RESULT_TYPE]];
    
    // 保存录音文件,保存在sdk工作路径中,如未设置工作路径,则默认保存在library/cache下
    [self.iFlySpeechRecognizer setParameter:@"asr.pcm" forKey:[IFlySpeechConstant ASR_AUDIO_PATH]];
    
    BOOL isStart = [self.iFlySpeechRecognizer startListening];
    if (startListening)
    {
        // 如果开始录音失败,可能是上次请求未结束,暂不支持多路并发
        startListening(isStart);
    }
}

/// 取消听写
- (void)voiceCancel
{
    [self.iFlySpeechRecognizer cancel];
}

/// 停止录音
- (void)voiceStop
{
    [self.iFlySpeechRecognizer stopListening];
}

#pragma mark IFlySpeechRecognizerDelegate------------

/**
 识别结果返回代理
 @param :results识别结果
 @ param :isLast 表示是否最后一次结果
 */
- (void)onResults:(NSArray *)results isLast:(BOOL)isLast
{
    NSMutableString *resultString = [[NSMutableString alloc] init];
    NSDictionary *dic = results[0];
    for (NSString *key in dic)
    {
        [resultString appendFormat:@"%@",key];
    }
    NSString *resultFromJson = [[self class] stringFromJson:resultString];
    NSString *resultTextTemp = [NSString stringWithFormat:@"%@%@", self.resultText, resultFromJson];
    [self.resultText setString:resultTextTemp];
    if (isLast)
    {
        if (self.resultSpeech)
        {
            // 去掉最后一个句号
            NSRange range = [self.resultText rangeOfString:@"。" options:NSBackwardsSearch];
            if (range.location != NSNotFound)
            {
                resultTextTemp = [self.resultText substringToIndex:range.location];
                [self.resultText setString:resultTextTemp];
            }
            self.resultSpeech(self.resultText);
        }
    }
    
    [self voiceCancel];
}

/**
 识别会话结束返回代理
 @ param error 错误码,error.errorCode=0表示正常结束,非0表示发生错误。 
 */
- (void)onError:(IFlySpeechError *)error
{
    if (self.errorSpeech)
    {
        BOOL isSuccess = (0 == error.errorCode);
        self.errorSpeech(isSuccess);
    }
}

/**
 停止录音回调
 */
- (void)onEndOfSpeech
{
    if (self.endSpeech)
    {
        self.endSpeech();
    }
}

/**
 开始识别回调
 */
- (void)onBeginOfSpeech
{
    if (self.beginSpeech)
    {
        self.beginSpeech();
    }
}

/**
 音量回调函数 volume 0-30
 */
- (void)onVolumeChanged:(int)volume
{
    if (self.volumeSpeech)
    {
        self.volumeSpeech(volume);
    }
}


#pragma mark 解析方法------------

/**************************************************************************/

/**
 解析命令词返回的结果
 */
+ (NSString *)stringFromAsr:(NSString *)params;
{
    NSMutableString * resultString = [[NSMutableString alloc] init];
    NSString *inputString = nil;
    
    NSArray *array = [params componentsSeparatedByString:@"\n"];
    
    for (int index = 0; index < array.count; index++)
    {
        NSRange range;
        NSString *line = [array objectAtIndex:index];
        
        NSRange idRange = [line rangeOfString:@"id="];
        NSRange nameRange = [line rangeOfString:@"name="];
        NSRange confidenceRange = [line rangeOfString:@"confidence="];
        NSRange grammarRange = [line rangeOfString:@" grammar="];
        
        NSRange inputRange = [line rangeOfString:@"input="];
        
        if (confidenceRange.length == 0 || grammarRange.length == 0 || inputRange.length == 0 )
        {
            continue;
        }
        
        // check nomatch
        if (idRange.length != 0)
        {
            NSUInteger idPosX = idRange.location + idRange.length;
            NSUInteger idLength = nameRange.location - idPosX;
            range = NSMakeRange(idPosX, idLength);
            
            NSString *subString = [line substringWithRange:range];
            NSCharacterSet *subSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
            NSString *idValue = [subString stringByTrimmingCharactersInSet:subSet];
            if ([idValue isEqualToString:@"nomatch"])
            {
                return @"";
            }
        }
        
        // Get Confidence Value
        NSUInteger confidencePosX = confidenceRange.location + confidenceRange.length;
        NSUInteger confidenceLength = grammarRange.location - confidencePosX;
        range = NSMakeRange(confidencePosX,confidenceLength);
        
        NSString *score = [line substringWithRange:range];
        
        NSUInteger inputStringPosX = inputRange.location + inputRange.length;
        NSUInteger inputStringLength = line.length - inputStringPosX;
        
        range = NSMakeRange(inputStringPosX , inputStringLength);
        inputString = [line substringWithRange:range];
        
        [resultString appendFormat:@"%@ 置信度%@\n",inputString, score];
    }
    
    return resultString;
}

/**
 解析听写json格式的数据
 params例如:
 {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"尽","sc":0}]},{"bg":0,"cw":[{"w":"黄河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]}
 */
+ (NSString *)stringFromJson:(NSString *)params
{
    if (params == NULL)
    {
        return nil;
    }
    
    NSMutableString *tempStr = [[NSMutableString alloc] init];
    // 返回的格式必须为utf8的,否则发生未知错误
    NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];
    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];
    
    if (resultDic != nil)
    {
        NSArray *wordArray = [resultDic objectForKey:@"ws"];
        
        for (int i = 0; i < [wordArray count]; i++)
        {
            NSDictionary *wsDic = [wordArray objectAtIndex:i];
            NSArray *cwArray = [wsDic objectForKey:@"cw"];
            
            for (int j = 0; j < [cwArray count]; j++)
            {
                NSDictionary *wDic = [cwArray objectAtIndex:j];
                NSString *str = [wDic objectForKey:@"w"];
                [tempStr appendString: str];
            }
        }
    }
    
    return tempStr;
}


/**
 解析语法识别返回的结果
 */
+ (NSString *)stringFromABNFJson:(NSString *)params
{
    if (params == NULL)
    {
        return nil;
    }
    NSMutableString *tempStr = [[NSMutableString alloc] init];
    NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];
    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];
    
    NSArray *wordArray = [resultDic objectForKey:@"ws"];
    for (int i = 0; i < [wordArray count]; i++)
    {
        NSDictionary *wsDic = [wordArray objectAtIndex:i];
        NSArray *cwArray = [wsDic objectForKey:@"cw"];
        
        for (int j = 0; j < [cwArray count]; j++)
        {
            NSDictionary *wDic = [cwArray objectAtIndex:j];
            NSString *str = [wDic objectForKey:@"w"];
            NSString *score = [wDic objectForKey:@"sc"];
            [tempStr appendString: str];
            [tempStr appendFormat:@" 置信度:%@",score];
            [tempStr appendString: @"\n"];
        }
    }
    
    return tempStr;
}

/**************************************************************************/

@end


使用

初始化方法
/// 启动初始化语音程序
+ (void)VoiceInitialize
{
    // 设置sdk的log等级,log保存在下面设置的工作路径中
    [IFlySetting setLogFile:LVL_ALL];
    
    // 打开输出在console的log开关
    [IFlySetting showLogcat:YES];
    
    // 设置sdk的工作路径
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
    NSString *cachePath = [paths objectAtIndex:0];
    [IFlySetting setLogFilePath:cachePath];
    
    // Appid是应用的身份信息,具有唯一性,初始化时必须要传入Appid。初始化是一个异步过程,可放在 App 启动时执行初始化,具体代码可以参 照 Demo 的 MSCAppDelegate.m。未初始化时使用服务,一般会返回错误码 10111.
    NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];
    [IFlySpeechUtility createUtility:initString];
}

初始化调用
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions 
{
    // Override point for customization after application launch.
    
    [VoiceConversion VoiceInitialize];
    
    return YES;
}


#import "VoiceConversion.h"

@interface ViewController ()

@property (nonatomic, strong) VoiceConversion *voiceConversion;
@property (nonatomic, strong) UILabel *messageLabel;

@end

@implementation ViewController

- (void)viewDidLoad {
    [super viewDidLoad];
    // Do any additional setup after loading the view, typically from a nib.
    
    UIBarButtonItem *startItem = [[UIBarButtonItem alloc] initWithTitle:@"start" style:UIBarButtonItemStyleDone target:self action:@selector(startItemClick:)];
    UIBarButtonItem *stopItem = [[UIBarButtonItem alloc] initWithTitle:@"stop" style:UIBarButtonItemStyleDone target:self action:@selector(stopItemClick:)];
    UIBarButtonItem *cancelItem = [[UIBarButtonItem alloc] initWithTitle:@"cancel" style:UIBarButtonItemStyleDone target:self action:@selector(cancelItemClick:)];
    self.navigationItem.rightBarButtonItems = @[startItem, stopItem, cancelItem];
    
    self.title = @"科大讯飞语音";
    
    [self setUI];
}

- (void)didReceiveMemoryWarning {
    [super didReceiveMemoryWarning];
    // Dispose of any resources that can be recreated.
}

#pragma mark - 视图

- (void)setUI
{
    if ([self respondsToSelector:@selector(setEdgesForExtendedLayout:)])
    {
        [self setEdgesForExtendedLayout:UIRectEdgeNone];
    }
    
    self.messageLabel = [[UILabel alloc] initWithFrame:CGRectMake(10.0, 10.0, CGRectGetWidth(self.view.bounds) - 10.0 * 2, 40.0)];
    [self.view addSubview:self.messageLabel];
    self.messageLabel.backgroundColor = [UIColor colorWithWhite:0.5 alpha:0.3];
    self.messageLabel.textAlignment = NSTextAlignmentCenter;
}

#pragma mark - 响应

- (void)startItemClick:(UIBarButtonItem *)item
{
    ViewController __weak *weakSelf = self;
    [self.voiceConversion voiceStart:^(BOOL isStart) {
        
        NSLog(@"1 start");
        
        if (isStart)
        {
            weakSelf.messageLabel.text = @"正在录音";
        }
        else
        {
            weakSelf.messageLabel.text = @"启动识别服务失败,请稍后重试";
        }
    } speechBegin:^{
        NSLog(@"2 begin");
    } speechEnd:^{
        NSLog(@"3 end");
    } speechError:^(BOOL isSuccess) {
        NSLog(@"4 error");
    } speechResult:^(NSString *text) {
        NSLog(@"5 result");
        weakSelf.messageLabel.text = text;
    } speechVolume:^(int volume) {
        NSLog(@"6 volume");
        NSString *volumeString = [NSString stringWithFormat:@"音量:%d", volume];
        weakSelf.messageLabel.text = volumeString;
    }];
}

- (void)stopItemClick:(UIBarButtonItem *)item
{
    [self.voiceConversion voiceStop];
    
    self.messageLabel.text = @"停止录音";
}

- (void)cancelItemClick:(UIBarButtonItem *)item
{
    [self.voiceConversion voiceCancel];
    
    self.messageLabel.text = @"取消识别";
}

#pragma mark - getter

- (VoiceConversion *)voiceConversion
{
    if (!_voiceConversion)
    {
        _voiceConversion = [[VoiceConversion alloc] init];
    }
    
    return _voiceConversion;
}

@end







猜你喜欢

转载自blog.csdn.net/potato512/article/details/51276208