一、进入科大讯飞官网,下载自己所需要的SDK----以离线命令词识别为例;
二、分析代码-----主要有以下四大函数
const char *get_audio_file(void); //选择进行离线语法识别的语音文件
int build_grammar(UserData *udata); //构建离线识别语法网络
int update_lexicon(UserData *udata); //更新离线识别语法词典
int run_asr(UserData *udata); //进行离线语法识别
2.1选择进行离线语法识别的语音文件
const char* get_audio_file(void)
{
char key = 0;
while(key != 27) //按Esc则退出
{
printf("请选择音频文件:\n");
printf("1.开始\n");
printf("2.测试\n");
key = _getch();
switch(key)
{
case '1':
printf("\n1.开始\n");
return "wav/start.pcm";
case '2':
printf("\n2.测试\n");
return "wav/test.pcm";
default:
continue;
}
}
exit(0);
return NULL;
}
2.2构建离线识别语法网络
int build_grm_cb(int ecode, const char *info, void *udata) //未理解该函数的作用,如有大神知道还请告知
{
UserData *grm_data = (UserData *)udata;
if (NULL != grm_data) {
grm_data->build_fini = 1;
grm_data->errcode = ecode;
}
if (MSP_SUCCESS == ecode && NULL != info) {
printf("构建语法成功! 语法ID:%s\n", info);
if (NULL != grm_data)
_snprintf(grm_data->grammar_id, MAX_GRAMMARID_LEN - 1, info);
}
else
printf("构建语法失败!%d\n", ecode);
return 0;
}
int build_grammar(UserData *udata)
{
FILE *grm_file = NULL;
char *grm_content = NULL;
unsigned int grm_cnt_len = 0;
char grm_build_params[MAX_PARAMS_LEN] = {NULL};
int ret = 0;
grm_file = fopen(GRM_FILE, "rb");
if(NULL == grm_file) {
printf("打开\"%s\"文件失败![%s]\n", GRM_FILE, strerror(errno));
return -1;
}
fseek(grm_file, 0, SEEK_END);
grm_cnt_len = ftell(grm_file);
fseek(grm_file, 0, SEEK_SET);
grm_content = (char *)malloc(grm_cnt_len + 1);
if (NULL == grm_content)
{
printf("内存分配失败!\n");
fclose(grm_file);
grm_file = NULL;
return -1;
}
fread((void*)grm_content, 1, grm_cnt_len, grm_file);
grm_content[grm_cnt_len] = '\0';
fclose(grm_file);
grm_file = NULL;
_snprintf(grm_build_params, MAX_PARAMS_LEN - 1,
"engine_type = local, \
asr_res_path = %s, sample_rate = %d, \
grm_build_path = %s, ",
ASR_RES_PATH,
SAMPLE_RATE_16K,
GRM_BUILD_PATH
);
ret = QISRBuildGrammar("bnf", grm_content, grm_cnt_len, grm_build_params, build_grm_cb, udata); //这里使用了回调函数
free(grm_content);
grm_content = NULL;
return ret;
}
2.3更新离线识别语法词典
int update_lex_cb(int ecode, const char *info, void *udata) //未理解该函数的作用,如有大神知道还请告知
{
UserData *lex_data = (UserData *)udata;
if (NULL != lex_data) {
lex_data->update_fini = 1;
lex_data->errcode = ecode;
}
if (MSP_SUCCESS == ecode)
printf("更新词典成功!\n");
else
printf("更新词典失败!%d\n", ecode);
return 0;
}
int update_lexicon(UserData *udata) //这里是更新语法,可根据自己的实际需要选择保留或者删除----仅作demo演示
{
const char *lex_content = "更新前命令词\n更新后命令词";
unsigned int lex_cnt_len = strlen(lex_content);
char update_lex_params[MAX_PARAMS_LEN] = {NULL};
_snprintf(update_lex_params, MAX_PARAMS_LEN - 1,
"engine_type = local, text_encoding = GB2312, \
asr_res_path = %s, sample_rate = %d, \
grm_build_path = %s, grammar_list = %s, ",
ASR_RES_PATH,
SAMPLE_RATE_16K,
GRM_BUILD_PATH,
udata->grammar_id);
return QISRUpdateLexicon(LEX_NAME, lex_content, lex_cnt_len, update_lex_params, update_lex_cb, udata);
}
2.4进行离线语法识别
int run_asr(UserData *udata)
{
char asr_params[MAX_PARAMS_LEN] = {NULL};
const char *rec_rslt = NULL;
const char *session_id = NULL;
const char *asr_audiof = NULL;
FILE *f_pcm = NULL;
char *pcm_data = NULL;
long pcm_count = 0;
long pcm_size = 0;
int last_audio = 0;
int aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;
int ep_status = MSP_EP_LOOKING_FOR_SPEECH;
int rec_status = MSP_REC_STATUS_INCOMPLETE;
int rss_status = MSP_REC_STATUS_INCOMPLETE;
int errcode = -1;
asr_audiof = get_audio_file();
f_pcm = fopen(asr_audiof, "rb");
if (NULL == f_pcm) {
printf("打开\"%s\"失败![%s]\n", f_pcm, strerror(errno));
goto run_error;
}
fseek(f_pcm, 0, SEEK_END);
pcm_size = ftell(f_pcm);
fseek(f_pcm, 0, SEEK_SET);
pcm_data = (char *)malloc(pcm_size);
if (NULL == pcm_data)
goto run_error;
fread((void *)pcm_data, pcm_size, 1, f_pcm);
fclose(f_pcm);
f_pcm = NULL;
//离线语法识别参数设置
_snprintf(asr_params, MAX_PARAMS_LEN - 1,
"engine_type = local, \
asr_res_path = %s, sample_rate = %d, \
grm_build_path = %s, local_grammar = %s, \
result_type = xml, result_encoding = GB2312, ",
ASR_RES_PATH,
SAMPLE_RATE_16K,
GRM_BUILD_PATH,
udata->grammar_id
);
session_id = QISRSessionBegin(NULL, asr_params, &errcode);
if (NULL == session_id)
goto run_error;
printf("开始识别...\n");
while (1) {
unsigned int len = 6400;
if (pcm_size < 12800) {
len = pcm_size;
last_audio = 1;
}
aud_stat = MSP_AUDIO_SAMPLE_CONTINUE;
if (0 == pcm_count)
aud_stat = MSP_AUDIO_SAMPLE_FIRST;
if (len <= 0)
break;
printf(">");
errcode = QISRAudioWrite(session_id, (const void *)&pcm_data[pcm_count], len, aud_stat, &ep_status, &rec_status);
if (MSP_SUCCESS != errcode)
goto run_error;
pcm_count += (long)len;
pcm_size -= (long)len;
//检测到音频结束
if (MSP_EP_AFTER_SPEECH == ep_status)
break;
_sleep(150); //模拟人说话时间间隙
}
//主动点击音频结束
QISRAudioWrite(session_id, (const void *)NULL, 0, MSP_AUDIO_SAMPLE_LAST, &ep_status, &rec_status);
free(pcm_data);
pcm_data = NULL;
//获取识别结果
while (MSP_REC_STATUS_COMPLETE != rss_status && MSP_SUCCESS == errcode) {
rec_rslt = QISRGetResult(session_id, &rss_status, 0, &errcode);
_sleep(150);
}
printf("\n识别结束:\n");
printf("=============================================================\n");
if (NULL != rec_rslt)
printf("%s\n", rec_rslt);
else
printf("没有识别结果!");
printf("=============================================================\n");
goto run_exit;
run_error:
if (NULL != pcm_data) {
free(pcm_data);
pcm_data = NULL;
}
if (NULL != f_pcm) {
fclose(f_pcm);
f_pcm = NULL;
}
run_exit:
QISRSessionEnd(session_id, NULL);
return errcode;
}
2.5程序主函数
int main(int argc, char* argv[])
{
const char *login_config = "appid = 自己的appid"; //登录参数
UserData asr_data;
int ret = 0 ;
ret = MSPLogin(NULL, NULL, login_config); //第一个参数为用户名,第二个参数为密码,传NULL即可,第三个参数是登录参数
if (MSP_SUCCESS != ret) {
printf("登录失败:%d\n", ret);
goto exit;
}
memset(&asr_data, 0, sizeof(UserData));
printf("构建离线识别语法网络...\n");
ret = build_grammar(&asr_data); //第一次使用某语法进行识别,需要先构建语法网络,获取语法ID,之后使用此语法进行识别,无需再次构建
if (MSP_SUCCESS != ret) {
printf("构建语法调用失败!\n");
goto exit;
}
while (1 != asr_data.build_fini)
_sleep(300);
if (MSP_SUCCESS != asr_data.errcode)
goto exit;
printf("离线识别语法网络构建完成,开始识别...\n");
ret = run_asr(&asr_data);
if (MSP_SUCCESS != ret) {
printf("离线语法识别出错: %d \n", ret);
goto exit;
}
printf("请按任意键继续\n");
_getch();
printf("更新离线语法词典...\n");
ret = update_lexicon(&asr_data); //当语法词典槽中的词条需要更新时,调用QISRUpdateLexicon接口完成更新
if (MSP_SUCCESS != ret) {
printf("更新词典调用失败!\n");
goto exit;
}
while (1 != asr_data.update_fini)
_sleep(300);
if (MSP_SUCCESS != asr_data.errcode)
goto exit;
printf("更新离线语法词典完成,开始识别...\n");
ret = run_asr(&asr_data);
if (MSP_SUCCESS != ret) {
printf("离线语法识别出错: %d \n", ret);
goto exit;
}
exit:
MSPLogout();
printf("请按任意键退出...\n");
_getch();
return 0;
}
至此,几大函数分析基本完成,细节还需要深挖,本人也是刚接触语音识别,也不是很懂,还需要各路大神不吝啬赐教。
三、将上述代码做成DLL被其他程序调用------改写上述函数进行调用时,需特别注意函数声明,切勿重复声明。
3.1创建dll项目
3.2更改我们的头文件和源文件
3.3生成DLL文件,找到我们的DLL文件。
参考:
注:科大讯飞的官方demo在VS2015版本以上不支持64位,仅支持32位。