文章目录

  • 1. 前言
  • 2. 源码分析
  • 2.1 unimrcp 模块的加载
  • 2.2 tts 功能的实现


1. 前言

MRCP(Media Resource Control Protocol, 媒体资源控制协议) 是一种通讯协议,用于语音服务器向客户端提供各种语音服务,例如 语音识别(ASR)和语音合成(TTS)。FreeSWITCH 中的 unimrcp模块 就是对接 MRCP 协议栈的中间层,提供了集成使用 ASR、TTS 的能力。下图是 FreeSWITCH 中 unimrcp模块 的源码时序,下文将对源码进行分析

freeswitch 启动模块 freeswitch 开源协议_freeswitch 启动模块

2. 源码分析

2.1 unimrcp 模块的加载

  1. 在 FreeSWITCH 1.10 源码阅读(1)-服务启动及 Event Socket 模块工作原理 中笔者分析了 FreeSWITCH 加载模块的主流程,unimrcp 模块被加载时将触发 mod_unimrcp.c#SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load) 执行。这个函数比较简练,大致有以下几个关键点:
  1. 调用 mod_unimrcp.c#mod_unimrcp_do_config() 函数获取 XML 配置中指定的 unimrcp.conf 名称下的配置内容,这部分不做赘述
  2. 调用 mod_unimrcp.c#mod_unimrcp_client_create() 函数创建 FreeSWITCH 本地的 MRCP 客户端,用于后续与 MRCP 服务器交互
  3. 调用 mod_unimrcp.c#synth_load() 函数加载创建 TTS 应用
  4. 调用 mod_unimrcp.c#recog_load() 函数加载创建 ASR 应用,与 TTS 应用加载类似,不做赘述
  5. 调用库函数 mrcp_client.c#mrcp_client_start() 新开线程启动 MRCP 客户端,涉及库函数不做赘述
SWITCH_MODULE_LOAD_FUNCTION(mod_unimrcp_load)
{
 if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CREATE) != SWITCH_STATUS_SUCCESS) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CREATE);
 	return SWITCH_STATUS_TERM;
 }

 if (switch_event_reserve_subclass(MY_EVENT_PROFILE_CLOSE) != SWITCH_STATUS_SUCCESS) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_CLOSE);
 	return SWITCH_STATUS_TERM;
 }

 if (switch_event_reserve_subclass(MY_EVENT_PROFILE_OPEN) != SWITCH_STATUS_SUCCESS) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", MY_EVENT_PROFILE_OPEN);
 	return SWITCH_STATUS_TERM;
 }

 /* connect my internal structure to the blank pointer passed to me */
 *module_interface = switch_loadable_module_create_module_interface(pool, modname);

 memset(&globals, 0, sizeof(globals));
 switch_mutex_init(&globals.mutex, SWITCH_MUTEX_UNNESTED, pool);
 globals.speech_channel_number = 0;
 switch_core_hash_init_nocase(&globals.profiles);

 /* get MRCP module configuration */
 mod_unimrcp_do_config();
 if (zstr(globals.unimrcp_default_synth_profile)) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-tts-profile\n");
 	return SWITCH_STATUS_FALSE;
 }
 if (zstr(globals.unimrcp_default_recog_profile)) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing default-asr-profile\n");
 	return SWITCH_STATUS_FALSE;
 }

 /* link UniMRCP logs to FreeSWITCH */
 switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "UniMRCP log level = %s\n", globals.unimrcp_log_level);
 if (apt_log_instance_create(APT_LOG_OUTPUT_NONE, str_to_log_level(globals.unimrcp_log_level), pool) == FALSE) {
 	/* already created */
 	apt_log_priority_set(str_to_log_level(globals.unimrcp_log_level));
 }
 apt_log_ext_handler_set(unimrcp_log);

 /* Create the MRCP client */
 if ((globals.mrcp_client = mod_unimrcp_client_create(pool)) == NULL) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create mrcp client\n");
 	return SWITCH_STATUS_FALSE;
 }

 /* Create the synthesizer interface */
 if (synth_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {
 	return SWITCH_STATUS_FALSE;
 }

 /* Create the recognizer interface */
 if (recog_load(*module_interface, pool) != SWITCH_STATUS_SUCCESS) {
 	return SWITCH_STATUS_FALSE;
 }

 /* Start the client stack */
 mrcp_client_start(globals.mrcp_client);

 /* indicate that the module should continue to be loaded */
 return SWITCH_STATUS_SUCCESS;
}
  1. mod_unimrcp.c#mod_unimrcp_client_create() 函数的关键点在于与底层 mrcp 库的交互,由于底层库已经不属于 FreeSWITCH 源码,本文不会再深入:
  1. 调用库函数 mrcp_client.c#mrcp_client_create() 创建 FreeSWITCH 作为客户端连接 MRCP 服务器的 mrcp_client_t 对象,并设置该对象上回调函数表中处理消息的函数为 mrcp_client.c#mrcp_client_msg_process()
  2. 调用库函数 mrcp_client_connection.c#mrcp_client_connection_agent_create() 创建 MRCP 连接端点对象 mrcp_connection_agent_t,用于管理底层 socket 数据读写
  3. 调用 mrcp_client.c#mrcp_client_connection_agent_register() 将 MRCP 连接端点注册到 FreeSWITCH 客户端对象中,并设置底层连接收到数据时的回调函数表为 mrcp_client.connection_method_vtable
  4. 解析 unimrcp 配置文件属性,创建对应的 profile,据此可以将多个 MRCP 服务器的连接信息隔离。如果是 v2 版本的 MRCP 协议,在 FreeSWITCH 和 MRCP 服务器之间还需要 SIP 信令交互,所以也会调用 mrcp_sofiasip_client_agent.c#mrcp_sofiasip_client_agent_create() 函数创建一个 SIP 交互的端点对象
static mrcp_client_t *mod_unimrcp_client_create(switch_memory_pool_t *mod_pool)
{
 switch_xml_t cfg = NULL, xml = NULL, profiles = NULL, profile = NULL;
 mrcp_client_t *client = NULL;
 apr_pool_t *pool = NULL;
 mrcp_resource_loader_t *resource_loader = NULL;
 mrcp_resource_factory_t *resource_factory = NULL;
 mpf_codec_manager_t *codec_manager = NULL;
 apr_size_t max_connection_count = 0;
 apt_bool_t offer_new_connection = FALSE;
 mrcp_connection_agent_t *connection_agent;
 mpf_engine_t *media_engine;
 apt_dir_layout_t *dir_layout;

 /* create the client */
 if ((dir_layout = apt_default_dir_layout_create("../", mod_pool)) == NULL) {
 	goto done;
 }
 client = mrcp_client_create(dir_layout);
 if (!client) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP client\n");
 	goto done;
 }

 pool = mrcp_client_memory_pool_get(client);
 if (!pool) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to get MRCP client memory pool\n");
 	client = NULL;
 	goto done;
 }

 /* load the synthesizer and recognizer resources */
 resource_loader = mrcp_resource_loader_create(FALSE, pool);
 if (!resource_loader) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP resource loader\n");
 	client = NULL;
 	goto done;
 } else {
 	apt_str_t synth_resource;
 	apt_str_t recog_resource;
 	apt_string_set(&synth_resource, "speechsynth");
 	mrcp_resource_load(resource_loader, &synth_resource);
 	apt_string_set(&recog_resource, "speechrecog");
 	mrcp_resource_load(resource_loader, &recog_resource);
 	resource_factory = mrcp_resource_factory_get(resource_loader);
 	mrcp_client_resource_factory_register(client, resource_factory);
 }

 codec_manager = mpf_engine_codec_manager_create(pool);
 if (!codec_manager) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF codec manager\n");
 	client = NULL;
 	goto done;
 }
 if (!mrcp_client_codec_manager_register(client, codec_manager)) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP client codec manager\n");
 	client = NULL;
 	goto done;
 }

 /* set up MRCP connection agent that will be shared with all profiles */
 if (!zstr(globals.unimrcp_max_connection_count)) {
 	max_connection_count = atoi(globals.unimrcp_max_connection_count);
 }
 if (max_connection_count <= 0) {
 	max_connection_count = 100;
 }
 if (!zstr(globals.unimrcp_offer_new_connection)) {
 	offer_new_connection = strcasecmp("true", globals.unimrcp_offer_new_connection);
 }
 connection_agent = mrcp_client_connection_agent_create("MRCPv2ConnectionAgent", max_connection_count, offer_new_connection, pool);
 if (!connection_agent) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MRCP connection agent\n");
 	client = NULL;
 	goto done;
 }
 if (!zstr(globals.unimrcp_rx_buffer_size)) {
 	apr_size_t rx_buffer_size = (apr_size_t)atol(globals.unimrcp_rx_buffer_size);
 	if (rx_buffer_size > 0) {
 		mrcp_client_connection_rx_size_set(connection_agent, rx_buffer_size);
 	}
 }
 if (!zstr(globals.unimrcp_tx_buffer_size)) {
 	apr_size_t tx_buffer_size = (apr_size_t)atol(globals.unimrcp_tx_buffer_size);
 	if (tx_buffer_size > 0) {
 		mrcp_client_connection_tx_size_set(connection_agent, tx_buffer_size);
 	}
 }
 if (!zstr(globals.unimrcp_request_timeout)) {
 	apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);
 	if (request_timeout > 0) {
 		mrcp_client_connection_timeout_set(connection_agent, request_timeout);
 	}
 }
 if (!mrcp_client_connection_agent_register(client, connection_agent)) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create register MRCP connection agent\n");
 	client = NULL;
 	goto done;
 }

 /* Set up the media engine that will be shared with all profiles */
 media_engine = mpf_engine_create("MediaEngine", pool);
 if (!media_engine) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to create MPF media engine\n");
 	client = NULL;
 	goto done;
 }
 if (!mpf_engine_scheduler_rate_set(media_engine, 1)) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to set MPF engine scheduler rate\n");
 	client = NULL;
 	goto done;
 }
 if (!mrcp_client_media_engine_register(client, media_engine)) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed to register MPF media engine\n");
 	client = NULL;
 	goto done;
 }

 /* configure the client profiles */
 if (!(xml = switch_xml_open_cfg(CONFIG_FILE, &cfg, NULL))) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Could not open %s\n", CONFIG_FILE);
 	client = NULL;
 	goto done;
 }
 if ((profiles = switch_xml_child(cfg, "profiles"))) {
 	for (profile = switch_xml_child(profiles, "profile"); profile; profile = switch_xml_next(profile)) {
 		/* a profile is a signaling agent + termination factory + media engine + connection agent (MRCPv2 only) */
 		mrcp_sig_agent_t *agent = NULL;
 		mpf_termination_factory_t *termination_factory = NULL;
 		mrcp_profile_t *mprofile = NULL;
 		mpf_rtp_config_t *rtp_config = NULL;
 		mpf_rtp_settings_t *rtp_settings = mpf_rtp_settings_alloc(pool);
 		mrcp_sig_settings_t *sig_settings = mrcp_signaling_settings_alloc(pool);
 		profile_t *mod_profile = NULL;
 		switch_xml_t default_params = NULL;
 		mrcp_connection_agent_t *v2_profile_connection_agent = NULL;

 		/* get profile attributes */
 		const char *name = apr_pstrdup(pool, switch_xml_attr(profile, "name"));
 		const char *version = switch_xml_attr(profile, "version");
 		if (zstr(name) || zstr(version)) {
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "<profile> missing name or version attribute\n");
 			client = NULL;
 			goto done;
 		}

 		/* prepare mod_unimrcp's profile for configuration */
 		profile_create(&mod_profile, name, mod_pool);
 		if (mod_profile) {
 			switch_core_hash_insert(globals.profiles, mod_profile->name, mod_profile);
 		} else {
 			client = NULL;
 			goto done;
 		}

 		/* pull in any default SPEAK params */
 		default_params = switch_xml_child(profile, "synthparams");
 		if (default_params) {
 			switch_xml_t param = NULL;
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK params\n");
 			for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {
 				const char *param_name = switch_xml_attr(param, "name");
 				const char *param_value = switch_xml_attr(param, "value");
 				if (zstr(param_name)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param name\n");
 					client = NULL;
 					goto done;
 				}
 				if (zstr(param_value)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing SPEAK param value\n");
 					client = NULL;
 					goto done;
 				}
 				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading SPEAK Param %s:%s\n", param_name, param_value);
 				switch_core_hash_insert(mod_profile->default_synth_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));
 			}
 		}

 		/* pull in any default RECOGNIZE params */
 		default_params = switch_xml_child(profile, "recogparams");
 		if (default_params) {
 			switch_xml_t param = NULL;
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE params\n");
 			for (param = switch_xml_child(default_params, "param"); param; param = switch_xml_next(param)) {
 				const char *param_name = switch_xml_attr(param, "name");
 				const char *param_value = switch_xml_attr(param, "value");
 				if (zstr(param_name)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param name\n");
 					client = NULL;
 					goto done;
 				}
 				if (zstr(param_value)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing RECOGNIZE param value\n");
 					client = NULL;
 					goto done;
 				}
 				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading RECOGNIZE Param %s:%s\n", param_name, param_value);
 				switch_core_hash_insert(mod_profile->default_recog_params, switch_core_strdup(pool, param_name), switch_core_strdup(pool, param_value));
 			}
 		}

 		/* create RTP config, common to MRCPv1 and MRCPv2 */
 		rtp_config = mpf_rtp_config_alloc(pool);
 		rtp_config->rtp_port_min = DEFAULT_RTP_PORT_MIN;
 		rtp_config->rtp_port_max = DEFAULT_RTP_PORT_MAX;
 		apt_string_set(&rtp_config->ip, DEFAULT_LOCAL_IP_ADDRESS);

 		if (strcmp("1", version) == 0) {
 			/* MRCPv1 configuration */
 			switch_xml_t param = NULL;
 			rtsp_client_config_t *config = mrcp_unirtsp_client_config_alloc(pool);
 			config->origin = DEFAULT_SDP_ORIGIN;
 			sig_settings->resource_location = DEFAULT_RESOURCE_LOCATION;
 			v2_profile_connection_agent = NULL;

 			if (!zstr(globals.unimrcp_request_timeout)) {
 				apr_size_t request_timeout = (apr_size_t)atol(globals.unimrcp_request_timeout);
 				if (request_timeout > 0) {
 					config->request_timeout = request_timeout;
 				}
 			}
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv1 profile: %s\n", name);
 			for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {
 				const char *param_name = switch_xml_attr(param, "name");
 				const char *param_value = switch_xml_attr(param, "value");
 				if (zstr(param_name)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");
 					client = NULL;
 					goto done;
 				}
 				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);
 				if (!process_mrcpv1_config(config, sig_settings, param_name, param_value, pool) &&
 					!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&
 					!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);
 				}
 			}
 			agent = mrcp_unirtsp_client_agent_create(name, config, pool);
 			if (!agent) {
 				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP RTSP client agent\n");
 				client = NULL;
 				goto done;
 			}
 		} else if (strcmp("2", version) == 0) {
 			/* MRCPv2 configuration */
 			mrcp_sofia_client_config_t *config = mrcp_sofiasip_client_config_alloc(pool);
 			switch_xml_t param = NULL;
 			config->local_ip = DEFAULT_LOCAL_IP_ADDRESS;
 			config->local_port = DEFAULT_SIP_LOCAL_PORT;
 			sig_settings->server_ip = DEFAULT_REMOTE_IP_ADDRESS;
 			sig_settings->server_port = DEFAULT_SIP_REMOTE_PORT;
 			config->ext_ip = NULL;
 			config->user_agent_name = DEFAULT_SOFIASIP_UA_NAME;
 			config->origin = DEFAULT_SDP_ORIGIN;
 			v2_profile_connection_agent = connection_agent;
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading MRCPv2 profile: %s\n", name);
 			for (param = switch_xml_child(profile, "param"); param; param = switch_xml_next(param)) {
 				const char *param_name = switch_xml_attr(param, "name");
 				const char *param_value = switch_xml_attr(param, "value");
 				if (zstr(param_name)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing param name\n");
 					client = NULL;
 					goto done;
 				}
 				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Loading Param %s:%s\n", param_name, param_value);
 				if (!process_mrcpv2_config(config, sig_settings, param_name, param_value, pool) &&
 					!process_rtp_config(client, rtp_config, rtp_settings, param_name, param_value, pool) &&
 					!process_profile_config(mod_profile, param_name, param_value, mod_pool)) {
 					switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring unknown param %s\n", param_name);
 				}
 			}
 			agent = mrcp_sofiasip_client_agent_create(name, config, pool);
 			if (!agent) {
 				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP SIP client agent\n");
 				client = NULL;
 				goto done;
 			}
 		} else {
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "version must be either \"1\" or \"2\"\n");
 			client = NULL;
 			goto done;
 		}

 		termination_factory = mpf_rtp_termination_factory_create(rtp_config, pool);
 		if (!termination_factory) {
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create RTP termination factory\n");
 			client = NULL;
 			goto done;
 		}
 		mrcp_client_rtp_factory_register(client, termination_factory, name);
 		mrcp_client_rtp_settings_register(client, rtp_settings, "RTP-Settings");
 		mrcp_client_signaling_settings_register(client, sig_settings, "Signaling-Settings");
 		mrcp_client_signaling_agent_register(client, agent);

 		/* create the profile and register it */
 		mprofile = mrcp_client_profile_create(NULL, agent, v2_profile_connection_agent, media_engine, termination_factory, rtp_settings, sig_settings, pool);
 		if (!mprofile) {
 			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Failed to create MRCP client profile\n");
 			client = NULL;
 			goto done;
 		}
 		mrcp_client_profile_register(client, mprofile, name);
 	}
 }

done:

 if (xml) {
 	switch_xml_free(xml);
 }

 return client;
}
  1. mod_unimrcp.c#synth_load() 函数加载创建 TTS 功能应用的处理主要分为两个部分,
  1. 创建 SWITCH_SPEECH_INTERFACE 接口,将 TTS 相关功能封装到 FreeSWITCH 标准模块结构中,供上层使用
  2. 调用库函数 mrcp_application.c#mrcp_application_create() 创建 unimrcp 模块的 TTS 应用,这个部分主要是将 unimrcp 模块的处理逻辑嵌入到底层 MRCP 客户端,供底层回调通知上层
static switch_status_t synth_load(switch_loadable_module_interface_t *module_interface, switch_memory_pool_t *pool)
{
 /* link to FreeSWITCH ASR / TTS callbacks */
 switch_speech_interface_t *speech_interface = NULL;
 if ((speech_interface = (switch_speech_interface_t *) switch_loadable_module_create_interface(module_interface, SWITCH_SPEECH_INTERFACE)) == NULL) {
 	return SWITCH_STATUS_FALSE;
 }
 speech_interface->interface_name = MOD_UNIMRCP;
 speech_interface->speech_open = synth_speech_open;
 speech_interface->speech_close = synth_speech_close;
 speech_interface->speech_feed_tts = synth_speech_feed_tts;
 speech_interface->speech_read_tts = synth_speech_read_tts;
 speech_interface->speech_flush_tts = synth_speech_flush_tts;
 speech_interface->speech_text_param_tts = synth_speech_text_param_tts;
 speech_interface->speech_numeric_param_tts = synth_speech_numeric_param_tts;
 speech_interface->speech_float_param_tts = synth_speech_float_param_tts;

 /* Create the synthesizer application and link its callbacks to UniMRCP */
 if ((globals.synth.app = mrcp_application_create(synth_message_handler, (void *) 0, pool)) == NULL) {
 	return SWITCH_STATUS_FALSE;
 }
 globals.synth.dispatcher.on_session_update = NULL;
 globals.synth.dispatcher.on_session_terminate = speech_on_session_terminate;
 globals.synth.dispatcher.on_channel_add = speech_on_channel_add;
 globals.synth.dispatcher.on_channel_remove = speech_on_channel_remove;
 globals.synth.dispatcher.on_message_receive = synth_on_message_receive;
 globals.synth.audio_stream_vtable.destroy = NULL;
 globals.synth.audio_stream_vtable.open_rx = NULL;
 globals.synth.audio_stream_vtable.close_rx = NULL;
 globals.synth.audio_stream_vtable.read_frame = NULL;
 globals.synth.audio_stream_vtable.open_tx = NULL;
 globals.synth.audio_stream_vtable.close_tx = NULL;
 globals.synth.audio_stream_vtable.write_frame = synth_stream_write;
 mrcp_client_application_register(globals.mrcp_client, globals.synth.app, "synth");

 /* map FreeSWITCH params to MRCP param */
 switch_core_hash_init_nocase(&globals.synth.fs_param_map);
 switch_core_hash_insert(globals.synth.fs_param_map, "voice", "voice-name");

 /* map MRCP params to UniMRCP ID */
 switch_core_hash_init_nocase(&globals.synth.param_id_map);
 switch_core_hash_insert(globals.synth.param_id_map, "jump-size", unimrcp_param_id_create(SYNTHESIZER_HEADER_JUMP_SIZE, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "kill-on-barge-in", unimrcp_param_id_create(SYNTHESIZER_HEADER_KILL_ON_BARGE_IN, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "speaker-profile", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAKER_PROFILE, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "completion-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_CAUSE, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "completion-reason", unimrcp_param_id_create(SYNTHESIZER_HEADER_COMPLETION_REASON, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "voice-gender", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_GENDER, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "voice-age", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_AGE, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "voice-variant", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_VARIANT, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "voice-name", unimrcp_param_id_create(SYNTHESIZER_HEADER_VOICE_NAME, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "prosody-volume", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_VOLUME, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "prosody-rate", unimrcp_param_id_create(SYNTHESIZER_HEADER_PROSODY_RATE, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "speech-marker", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_MARKER, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "speech-language", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEECH_LANGUAGE, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_FETCH_HINT, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "audio-fetch-hint", unimrcp_param_id_create(SYNTHESIZER_HEADER_AUDIO_FETCH_HINT, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "failed-uri", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "failed-uri-cause", unimrcp_param_id_create(SYNTHESIZER_HEADER_FAILED_URI_CAUSE, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "speak-restart", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_RESTART, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "speak-length", unimrcp_param_id_create(SYNTHESIZER_HEADER_SPEAK_LENGTH, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "load-lexicon", unimrcp_param_id_create(SYNTHESIZER_HEADER_LOAD_LEXICON, pool));
 switch_core_hash_insert(globals.synth.param_id_map, "lexicon-search-order", unimrcp_param_id_create(SYNTHESIZER_HEADER_LEXICON_SEARCH_ORDER, pool));

 return SWITCH_STATUS_SUCCESS;
}

2.2 tts 功能的实现

  1. speak 放音 APP 为例,当上层执行这个 APP 时实际调用到 mod_dptools.c#speak_function() 函数,可以看到该函数主要处理是校验参数合法性,然后调用 switch_ivr_play_say.c#switch_ivr_speak_text() 函数
SWITCH_STANDARD_APP(speak_function)
{
 switch_channel_t *channel = switch_core_session_get_channel(session);
 char buf[10];
 char *argv[3] = { 0 };
 int argc;
 const char *engine = NULL;
 const char *voice = NULL;
 char *text = NULL;
 char *mydata = NULL;
 switch_input_args_t args = { 0 };

 if (zstr(data) || !(mydata = switch_core_session_strdup(session, data))) {
 	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");
 	return;
 }

 argc = switch_separate_string(mydata, '|', argv, sizeof(argv) / sizeof(argv[0]));

 if (argc == 0) {
 	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params!\n");
 	return;
 } else if (argc == 1) {
 	text = switch_core_session_strdup(session, data); /* unstripped text */
 } else if (argc == 2) {
 	voice = argv[0];
 	text = switch_core_session_strdup(session, data + (argv[1] - argv[0])); /* unstripped text */
 } else {
 	engine = argv[0];
 	voice = argv[1];
 	text = switch_core_session_strdup(session, data + (argv[2] - argv[0])); /* unstripped text */
 }

 if (!engine) {
 	engine = switch_channel_get_variable(channel, "tts_engine");
 }

 if (!voice) {
 	voice = switch_channel_get_variable(channel, "tts_voice");
 }

 if (!(engine && voice && text)) {
 	if (!engine) {
 		engine = "NULL";
 	}
 	if (!voice) {
 		voice = "NULL";
 	}
 	if (!text) {
 		text = "NULL";
 	}
 	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid Params! [%s][%s][%s]\n", engine, voice, text);
 	switch_channel_hangup(channel, SWITCH_CAUSE_DESTINATION_OUT_OF_ORDER);
 }

 args.input_callback = on_dtmf;
 args.buf = buf;
 args.buflen = sizeof(buf);

 switch_channel_set_variable(channel, SWITCH_PLAYBACK_TERMINATOR_USED, "");

 switch_ivr_speak_text(session, engine, voice, text, &args);
}
  1. switch_ivr_play_say.c#switch_ivr_speak_text() 函数核心处理为以下几步:
  1. 调用函数 switch_core_soeech.c#switch_core_speech_open() 使用本地 MRCP 客户端请求 MRCP 服务器新建会话
  2. 调用函数 switch_ivr_play_say.c#switch_ivr_speak_text_handle() 处理语音合成
SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session,
 												  const char *tts_name, const char *voice_name, const char *text, switch_input_args_t *args)
{
 switch_channel_t *channel = switch_core_session_get_channel(session);
 uint32_t rate = 0;
 int interval = 0;
 uint32_t channels;
 switch_frame_t write_frame = { 0 };
 switch_timer_t ltimer, *timer;
 switch_codec_t lcodec, *codec;
 switch_memory_pool_t *pool = switch_core_session_get_pool(session);
 char *codec_name;
 switch_status_t status = SWITCH_STATUS_SUCCESS;
 switch_speech_handle_t lsh, *sh;
 switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
 const char *timer_name, *var;
 cached_speech_handle_t *cache_obj = NULL;
 int need_create = 1, need_alloc = 1;
 switch_codec_implementation_t read_impl = { 0 };
 switch_core_session_get_read_impl(session, &read_impl);

 if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
 	return SWITCH_STATUS_FALSE;
 }

 arg_recursion_check_start(args);

 sh = ↰
 codec = &lcodec;
 timer = <imer;

 if ((var = switch_channel_get_variable(channel, SWITCH_CACHE_SPEECH_HANDLES_VARIABLE)) && switch_true(var)) {
 	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cache enabled");
 	if ((cache_obj = (cached_speech_handle_t *) switch_channel_get_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME))) {
 		need_create = 0;
 		if (!strcasecmp(cache_obj->tts_name, tts_name)) {
 			need_alloc = 0;
 		} else {
 			switch_ivr_clear_speech_cache(session);
 		}
 	}

 	if (!cache_obj) {
 		cache_obj = (cached_speech_handle_t *) switch_core_session_alloc(session, sizeof(*cache_obj));
 	}
 	if (need_alloc) {
 		switch_copy_string(cache_obj->tts_name, tts_name, sizeof(cache_obj->tts_name));
 		switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));
 		switch_channel_set_private(channel, SWITCH_CACHE_SPEECH_HANDLES_OBJ_NAME, cache_obj);
 	}
 	sh = &cache_obj->sh;
 	codec = &cache_obj->codec;
 	timer = &cache_obj->timer;
 }

 timer_name = switch_channel_get_variable(channel, "timer_name");

 switch_core_session_reset(session, SWITCH_FALSE, SWITCH_FALSE);

 rate = read_impl.actual_samples_per_second;
 interval = read_impl.microseconds_per_packet / 1000;
 channels = read_impl.number_of_channels;

 if (need_create) {
 	memset(sh, 0, sizeof(*sh));
 	if ((status = switch_core_speech_open(sh, tts_name, voice_name, (uint32_t) rate, interval, read_impl.number_of_channels, &flags, NULL)) != SWITCH_STATUS_SUCCESS) {
 		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Invalid TTS module %s[%s]!\n", tts_name, voice_name);
 		switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
 		switch_ivr_clear_speech_cache(session);
 		arg_recursion_check_stop(args);
 		return status;
 	}
 } else if (cache_obj && strcasecmp(cache_obj->voice_name, voice_name)) {
 	switch_copy_string(cache_obj->voice_name, voice_name, sizeof(cache_obj->voice_name));
 	switch_core_speech_text_param_tts(sh, "voice", voice_name);
 }

 if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
 	flags = 0;
 	switch_core_speech_close(sh, &flags);
 	arg_recursion_check_stop(args);
 	return SWITCH_STATUS_FALSE;
 }
 switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "OPEN TTS %s\n", tts_name);

 codec_name = "L16";

 if (need_create) {
 	if (switch_core_codec_init(codec,
 							   codec_name,
 							   NULL,
 							   NULL, (int) rate, interval, channels, SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE, NULL,
 							   pool) == SWITCH_STATUS_SUCCESS) {
 		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activated\n");
 	} else {
 		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Raw Codec Activation Failed %s@%uhz 1 channel %dms\n", codec_name,
 						  rate, interval);
 		flags = 0;
 		switch_core_speech_close(sh, &flags);
 		switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
 		switch_ivr_clear_speech_cache(session);
 		arg_recursion_check_stop(args);
 		return SWITCH_STATUS_GENERR;
 	}
 }

 write_frame.codec = codec;

 if (timer_name) {
 	if (need_create) {
 		if (switch_core_timer_init(timer, timer_name, interval, (int) sh->samples, pool) != SWITCH_STATUS_SUCCESS) {
 			switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Setup timer failed!\n");
 			switch_core_codec_destroy(write_frame.codec);
 			flags = 0;
 			switch_core_speech_close(sh, &flags);
 			switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE);
 			switch_ivr_clear_speech_cache(session);
 			arg_recursion_check_stop(args);
 			return SWITCH_STATUS_GENERR;
 		}
 		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Setup timer success %u bytes per %d ms!\n", sh->samples * 2,
 						  interval);
 	}
 	switch_core_timer_sync(timer); // Sync timer

 	/* start a thread to absorb incoming audio */
 	switch_core_service_session(session);

 }

 status = switch_ivr_speak_text_handle(session, sh, write_frame.codec, timer_name ? timer : NULL, text, args);
 flags = 0;

 if (!cache_obj) {
 	switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "no cache_obj");
 	switch_core_speech_close(sh, &flags);
 	switch_core_codec_destroy(codec);
 }

 if (timer_name) {
 	/* End the audio absorbing thread */
 	switch_core_thread_session_end(session);
 	if (!cache_obj) {
 		switch_core_timer_destroy(timer);
 	}
 }

 switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE);
 arg_recursion_check_stop(args);

 return status;
}
  1. switch_core_soeech.c#switch_core_speech_open() 函数实际只是通过核心注册的接口调用到 unimrcp 模块的 mod_unimrcp.c#synth_speech_open() 函数
SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh,
 													const char *module_name,
 													const char *voice_name,
 													unsigned int rate, unsigned int interval, unsigned int channels,
 													switch_speech_flag_t *flags, switch_memory_pool_t *pool)
{
 switch_status_t status;
 char buf[256] = "";
 char *param = NULL;

 if (!sh || !flags || zstr(module_name)) {
 	return SWITCH_STATUS_FALSE;
 }

 if (strchr(module_name, ':')) {
 	switch_set_string(buf, module_name);
 	if ((param = strchr(buf, ':'))) {
 		*param++ = '\0';
 		module_name = buf;
 	}
 }

 if ((sh->speech_interface = switch_loadable_module_get_speech_interface(module_name)) == 0) {
 	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid speech module [%s]!\n", module_name);
 	return SWITCH_STATUS_GENERR;
 }

 sh->flags = *flags;
 if (pool) {
 	sh->memory_pool = pool;
 } else {
 	if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
 		UNPROTECT_INTERFACE(sh->speech_interface);
 		return status;
 	}
 	switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
 }

 sh->engine = switch_core_strdup(sh->memory_pool, module_name);
 if (param) {
 	sh->param = switch_core_strdup(sh->memory_pool, param);
 }

 sh->rate = rate;
 sh->name = switch_core_strdup(sh->memory_pool, module_name);
 sh->samples = switch_samples_per_packet(rate, interval);
 sh->samplerate = rate;
 sh->native_rate = rate;
 sh->channels = channels;
 sh->real_channels = 1;

 if ((status = sh->speech_interface->speech_open(sh, voice_name, rate, channels, flags)) == SWITCH_STATUS_SUCCESS) {
 	switch_set_flag(sh, SWITCH_SPEECH_FLAG_OPEN);
 } else {
 	UNPROTECT_INTERFACE(sh->speech_interface);
 }

 return status;
}
  1. mod_unimrcp.c#synth_speech_open() 函数的核心处理是创建一个 FreeSWITCH 层面的 speech_channel_t 对象,并调用 mod_unimrcp.c#speech_channel_open() 函数通过底层 MRCP 客户端建立与远程 MRCP 服务端之间的连接
static switch_status_t synth_speech_open(switch_speech_handle_t *sh, const char *voice_name, int rate, int channels, switch_speech_flag_t *flags)
{
 switch_status_t status = SWITCH_STATUS_SUCCESS;
 speech_channel_t *schannel = NULL;
 const char *profile_name = sh->param;
 profile_t *profile = NULL;
 int speech_channel_number = get_next_speech_channel_number();
 char *name = NULL;
 char *session_uuid = NULL;
 switch_hash_index_t *hi = NULL;

 /* Name the channel */
 if (profile_name && strchr(profile_name, ':')) {
 	/* Profile has session name appended to it.  Pick it out */
 	profile_name = switch_core_strdup(sh->memory_pool, profile_name);
 	session_uuid = strchr(profile_name, ':');
 	*session_uuid = '\0';
 	session_uuid++;
 	session_uuid = switch_core_strdup(sh->memory_pool, session_uuid);
 } else {
 	/* check if session is associated w/ this memory pool */
 	switch_core_session_t *session = switch_core_memory_pool_get_data(sh->memory_pool, "__session");
 	if (session) {
 		session_uuid = switch_core_session_get_uuid(session);
 	}
 }
 name = switch_core_sprintf(sh->memory_pool, "TTS-%d", speech_channel_number);

 switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO,
 				  "speech_handle: name = %s, rate = %d, speed = %d, samples = %d, voice = %s, engine = %s, param = %s\n", sh->name, sh->rate,
 				  sh->speed, sh->samples, sh->voice, sh->engine, sh->param);
 switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_INFO, "voice = %s, rate = %d\n", voice_name, rate);

 /* Allocate the channel */
 if (speech_channel_create(&schannel, name, session_uuid, SPEECH_CHANNEL_SYNTHESIZER, &globals.synth, (uint16_t) rate, sh->memory_pool) != SWITCH_STATUS_SUCCESS) {
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }
 sh->private_info = schannel;
 schannel->fsh = sh;

 /* Open the channel */
 if (zstr(profile_name)) {
 	profile_name = globals.unimrcp_default_synth_profile;
 }
 profile = (profile_t *) switch_core_hash_find(globals.profiles, profile_name);
 if (!profile) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(session_uuid), SWITCH_LOG_ERROR, "(%s) Can't find profile, %s\n", name, profile_name);
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }
 if ((status = speech_channel_open(schannel, profile)) != SWITCH_STATUS_SUCCESS) {
 	goto done;
 }

 /* Set session TTS params */
 if (!zstr(voice_name)) {
 	speech_channel_set_param(schannel, "Voice-Name", voice_name);
 }

 /* Set default TTS params */
 for (hi = switch_core_hash_first(profile->default_synth_params); hi; hi = switch_core_hash_next(&hi)) {
 	char *param_name = NULL, *param_val = NULL;
 	const void *key;
 	void *val;
 	switch_core_hash_this(hi, &key, NULL, &val);
 	param_name = (char *) key;
 	param_val = (char *) val;
 	speech_channel_set_param(schannel, param_name, param_val);
 }

 done:

 return status;
}
  1. mod_unimrcp.c#speech_channel_open() 函数主要逻辑是调用底层库函数创建 MRCP 会话,并建立连接
  1. 调用库函数 mrcp_application.c#mrcp_application_session_create() 创建 MRCP 会话
  2. 调用库函数 mrcp_application.c#mrcp_application_channel_create() 创建 MRCP 会话下的 channel
  3. 调用库函数 mrcp_application.c#mrcp_application_channel_add() 请求远程 MRCP 服务器创建新会话
static switch_status_t speech_channel_open(speech_channel_t *schannel, profile_t *profile)
{
 switch_status_t status = SWITCH_STATUS_SUCCESS;
 mpf_termination_t *termination = NULL;
 mrcp_resource_type_e resource_type;
 int warned = 0;

 switch_mutex_lock(schannel->mutex);

 /* make sure we can open channel */
 if (schannel->state != SPEECH_CHANNEL_CLOSED) {
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

 schannel->profile = profile;

 /* create MRCP session */
 if ((schannel->unimrcp_session = mrcp_application_session_create(schannel->application->app, profile->name, schannel)) == NULL) {
 	/* profile doesn't exist? */
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create session with %s\n", schannel->name, profile->name);
 	status = SWITCH_STATUS_RESTART;
 	goto done;
 }
 mrcp_application_session_name_set(schannel->unimrcp_session, schannel->name);

 /* create audio termination and add to channel */
 if ((termination = speech_channel_create_mpf_termination(schannel)) == NULL) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create termination with %s\n", schannel->name, profile->name);
 	mrcp_application_session_destroy(schannel->unimrcp_session);
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }
 if (schannel->type == SPEECH_CHANNEL_SYNTHESIZER) {
 	resource_type = MRCP_SYNTHESIZER_RESOURCE;
 } else {
 	resource_type = MRCP_RECOGNIZER_RESOURCE;
 }
 if ((schannel->unimrcp_channel = mrcp_application_channel_create(schannel->unimrcp_session, resource_type, termination, NULL, schannel)) == NULL) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to create channel with %s\n", schannel->name, profile->name);
 	mrcp_application_session_destroy(schannel->unimrcp_session);
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

 /* add channel to session... this establishes the connection to the MRCP server */
 if (mrcp_application_channel_add(schannel->unimrcp_session, schannel->unimrcp_channel) != TRUE) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Unable to add channel to session with %s\n", schannel->name, profile->name);
 	mrcp_application_session_destroy(schannel->unimrcp_session);
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

 /* wait for channel to be ready */
 warned = 0;
 while (schannel->state == SPEECH_CHANNEL_CLOSED) {
 	if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
 		warned = 1;
 		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not opened after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
 	}
 }
 if (schannel->state == SPEECH_CHANNEL_READY) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) channel is ready\n", schannel->name);
 } else if (schannel->state == SPEECH_CHANNEL_CLOSED) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Timed out waiting for channel to be ready\n", schannel->name);
 	/* can't retry */
 	status = SWITCH_STATUS_FALSE;
 } else if (schannel->state == SPEECH_CHANNEL_ERROR) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) Terminating MRCP session\n", schannel->name);
 	if (!mrcp_application_session_terminate(schannel->unimrcp_session)) {
 		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) Unable to terminate application session\n", schannel->name);
 		status = SWITCH_STATUS_FALSE;
 		goto done;
 	}

 	/* Wait for session to be cleaned up */
 	warned = 0;
 	while (schannel->state == SPEECH_CHANNEL_ERROR) {
 		if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
 			warned = 1;
 			switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) MRCP session has not cleaned up after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
 		}
 	}
 	if (schannel->state != SPEECH_CHANNEL_CLOSED) {
 		/* major issue... can't retry */
 		status = SWITCH_STATUS_FALSE;
 	} else {
 		/* failed to open profile, retry is allowed */
 		status = SWITCH_STATUS_RESTART;
 	}
 }

done:

 switch_mutex_unlock(schannel->mutex);
 return status;
}
  1. 此时回到本节步骤2第2步switch_ivr_play_say.c#switch_ivr_speak_text_handle() 函数是 tts 处理的功能主体,关键处理如下:
  1. 通过核心函数 switch_core.c#switch_core_speech_feed_tts() 调用到 mod_unimrcp.c#synth_speech_feed_tts() 函数发起 MRCP 语音合成请求
  2. 在 for 空循环中不断执行核心函数 switch_core.c#switch_core_speech_read_tts() 调用到 mod_unimrcp.c#synth_speech_read_tts() 函数尝试获取合成的语音
  3. 通过核心函数 switch_core.c#switch_core_session_write_frame() 将 MRCP 服务器返回的语音流写到当前会话,通过 RTP 传输到 SIP 终端播放
SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session_t *session,
 														 switch_speech_handle_t *sh,
 														 switch_codec_t *codec, switch_timer_t *timer, const char *text, switch_input_args_t *args)
{
 switch_channel_t *channel = switch_core_session_get_channel(session);
 short abuf[SWITCH_RECOMMENDED_BUFFER_SIZE];
 switch_dtmf_t dtmf = { 0 };
 uint32_t len = 0;
 switch_size_t ilen = 0;
 switch_frame_t write_frame = { 0 };
 switch_status_t status = SWITCH_STATUS_SUCCESS;
 switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_NONE;
 switch_size_t extra = 0;
 char *tmp = NULL;
 const char *star, *pound, *p;
 switch_size_t starlen, poundlen;

 if (!sh) {
 	return SWITCH_STATUS_FALSE;
 }

 if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
 	return SWITCH_STATUS_FALSE;
 }

 if (!switch_core_codec_ready(codec)) {
 	return SWITCH_STATUS_FALSE;
 }

 arg_recursion_check_start(args);

 write_frame.data = abuf;
 write_frame.buflen = sizeof(abuf);

 len = sh->samples * 2 * sh->channels;

 flags = 0;

 if (!(star = switch_channel_get_variable(channel, "star_replace"))) {
 	star = "star";
 }
 if (!(pound = switch_channel_get_variable(channel, "pound_replace"))) {
 	pound = "pound";
 }
 starlen = strlen(star);
 poundlen = strlen(pound);


 for (p = text; p && *p; p++) {
 	if (*p == '*') {
 		extra += starlen;
 	} else if (*p == '#') {
 		extra += poundlen;
 	}
 }

 if (extra) {
 	char *tp;
 	switch_size_t mylen = strlen(text) + extra + 1;
 	tmp = malloc(mylen);
 	if (!tmp) {
 		arg_recursion_check_stop(args);
 		return SWITCH_STATUS_MEMERR;
 	}
 	memset(tmp, 0, mylen);
 	tp = tmp;
 	for (p = text; p && *p; p++) {
 		if (*p == '*' ) {
 			snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", star);
 			tp += starlen;
 		} else if (*p == '#') {
 			snprintf(tp + strlen(tp), sizeof(tp) - strlen(tp), "%s", pound);
 			tp += poundlen;
 		} else {
 			*tp++ = *p;
 		}
 	}

 	text = tmp;
 }

 switch_core_speech_feed_tts(sh, text, &flags);
 switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Speaking text: %s\n", text);
 switch_safe_free(tmp);
 text = NULL;

 write_frame.rate = sh->rate;
 memset(write_frame.data, 0, len);
 write_frame.datalen = len;
 write_frame.samples = len / 2;
 write_frame.codec = codec;

 switch_assert(codec->implementation != NULL);

 switch_channel_audio_sync(channel);


 for (;;) {
 	switch_event_t *event;

 	ilen = len;

 	if (!switch_channel_ready(channel)) {
 		status = SWITCH_STATUS_FALSE;
 		break;
 	}

 	if (switch_channel_test_flag(channel, CF_BREAK)) {
 		switch_channel_clear_flag(channel, CF_BREAK);
 		status = SWITCH_STATUS_BREAK;
 		break;
 	}

 	switch_ivr_parse_all_events(session);

 	if (args) {
 		/* dtmf handler function you can hook up to be executed when a digit is dialed during playback
 		 * if you return anything but SWITCH_STATUS_SUCCESS the playback will stop.
 		 */
 		if (switch_channel_has_dtmf(channel)) {
 			if (!args->input_callback && !args->buf && !args->dmachine) {
 				status = SWITCH_STATUS_BREAK;
 				break;
 			}
 			if (args->buf && !strcasecmp(args->buf, "_break_")) {
 				status = SWITCH_STATUS_BREAK;
 			} else {
 				switch_channel_dequeue_dtmf(channel, &dtmf);

 				if (args->dmachine) {
 					char ds[2] = {dtmf.digit, '\0'};
 					if ((status = switch_ivr_dmachine_feed(args->dmachine, ds, NULL)) != SWITCH_STATUS_SUCCESS) {
 						break;
 					}
 				}

 				if (args->input_callback) {
 					status = args->input_callback(session, (void *) &dtmf, SWITCH_INPUT_TYPE_DTMF, args->buf, args->buflen);
 				} else if (args->buf) {
 					*((char *) args->buf) = dtmf.digit;
 					status = SWITCH_STATUS_BREAK;
 				}
 			}
 		}

 		if (args->input_callback) {
 			if (switch_core_session_dequeue_event(session, &event, SWITCH_FALSE) == SWITCH_STATUS_SUCCESS) {
 				switch_status_t ostatus = args->input_callback(session, event, SWITCH_INPUT_TYPE_EVENT, args->buf, args->buflen);
 				if (ostatus != SWITCH_STATUS_SUCCESS) {
 					status = ostatus;
 				}
 				switch_event_destroy(&event);
 			}
 		}

 		if (status != SWITCH_STATUS_SUCCESS) {
 			break;
 		}
 	}

 	if (switch_test_flag(sh, SWITCH_SPEECH_FLAG_PAUSE)) {
 		if (timer) {
 			if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {
 				break;
 			}
 		} else {
 			switch_frame_t *read_frame;
 			switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);

 			while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {
 				switch_ivr_parse_all_messages(session);
 				switch_yield(10000);
 			}

 			if (!SWITCH_READ_ACCEPTABLE(tstatus)) {
 				break;
 			}

 			if (args && args->dmachine) {
 				if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {
 					goto done;
 				}
 			}

 			if (args && (args->read_frame_callback)) {
 				if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {
 					goto done;
 				}
 			}
 		}
 		continue;
 	}


 	flags = SWITCH_SPEECH_FLAG_BLOCKING;
 	status = switch_core_speech_read_tts(sh, abuf, &ilen, &flags);

 	if (status != SWITCH_STATUS_SUCCESS) {
 		if (status == SWITCH_STATUS_BREAK) {
 			status = SWITCH_STATUS_SUCCESS;
 		}
 		break;
 	}

 	write_frame.datalen = (uint32_t) ilen;
 	write_frame.samples = (uint32_t) (ilen / 2 / sh->channels);
 	if (timer) {
 		write_frame.timestamp = timer->samplecount;
 	}
 	if (switch_core_session_write_frame(session, &write_frame, SWITCH_IO_FLAG_NONE, 0) != SWITCH_STATUS_SUCCESS) {
 		break;
 	}

 	if (timer) {
 		if (switch_core_timer_next(timer) != SWITCH_STATUS_SUCCESS) {
 			break;
 		}
 	} else {				/* time off the channel (if you must) */
 		switch_frame_t *read_frame;
 		switch_status_t tstatus = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);

 		while (switch_channel_ready(channel) && switch_channel_test_flag(channel, CF_HOLD)) {
 			switch_ivr_parse_all_messages(session);
 			switch_yield(10000);
 		}

 		if (!SWITCH_READ_ACCEPTABLE(tstatus)) {
 			break;
 		}

 		if (args && args->dmachine) {
 			if ((status = switch_ivr_dmachine_ping(args->dmachine, NULL)) != SWITCH_STATUS_SUCCESS) {
 				goto done;
 			}
 		}

 		if (args && (args->read_frame_callback)) {
 			if ((status = args->read_frame_callback(session, read_frame, args->user_data)) != SWITCH_STATUS_SUCCESS) {
 				goto done;
 			}
 		}
 	}
 }

 done:

 switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "done speaking text\n");
 flags = 0;
 switch_core_speech_flush_tts(sh);

 arg_recursion_check_stop(args);
 return status;
}
  1. mod_unimrcp.c#synth_speech_feed_tts() 函数的核心其实是执行 mod_unimrcp.c#synth_channel_speak() 函数,mod_unimrcp.c#synth_channel_speak()函数的核心处理如下:
  1. 调用底层库函数 mrcp_application.c#mrcp_application_message_create() 创建 SYNTHESIZER_SPEAK tts 请求的消息结构
  2. 调用底层库函数 mrcp_application.c#mrcp_application_message_send() 触发执行向 MRCP 服务器发送语音合成请求
  3. 等待 MRCP 服务器返回,将当前 tts 的 channel 状态流转为 SPEECH_CHANNEL_PROCESSING。这个部分主要依靠 unimrcp 模块加载时嵌入到底层 MRCP 客户端的回调 mod_unimrcp.c#synth_on_message_receive() 函数完成
static switch_status_t synth_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
{
 switch_status_t status = SWITCH_STATUS_SUCCESS;
 speech_channel_t *schannel = (speech_channel_t *) sh->private_info;

 if (zstr(text)) {
 	status = SWITCH_STATUS_FALSE;
 } else {
 	status = synth_channel_speak(schannel, text);
 }
 return status;
}

static switch_status_t synth_channel_speak(speech_channel_t *schannel, const char *text)
{
 switch_status_t status = SWITCH_STATUS_SUCCESS;
 mrcp_message_t *mrcp_message = NULL;
 mrcp_generic_header_t *generic_header = NULL;
 mrcp_synth_header_t *synth_header = NULL;
 int warned = 0;

 switch_mutex_lock(schannel->mutex);
 if (schannel->state != SPEECH_CHANNEL_READY) {
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

 mrcp_message = mrcp_application_message_create(schannel->unimrcp_session, schannel->unimrcp_channel, SYNTHESIZER_SPEAK);
 if (mrcp_message == NULL) {
 	switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_ERROR, "(%s) Failed to create SPEAK message\n", schannel->name);
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

 /* set generic header fields (content-type) */
 if ((generic_header = (mrcp_generic_header_t *) mrcp_generic_header_prepare(mrcp_message)) == NULL) {
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

 /* good enough way of determining SSML or plain text body */
 if (text_starts_with(text, XML_ID) || text_starts_with(text, SSML_ID)) {
 	apt_string_assign(&generic_header->content_type, schannel->profile->ssml_mime_type, mrcp_message->pool);
 } else {
 	apt_string_assign(&generic_header->content_type, MIME_TYPE_PLAIN_TEXT, mrcp_message->pool);
 }
 mrcp_generic_header_property_add(mrcp_message, GENERIC_HEADER_CONTENT_TYPE);

 /* set synthesizer header fields (voice, rate, etc.) */
 if ((synth_header = (mrcp_synth_header_t *) mrcp_resource_header_prepare(mrcp_message)) == NULL) {
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

 /* add params to MRCP message */
 synth_channel_set_params(schannel, mrcp_message, generic_header, synth_header);

 /* set body (plain text or SSML) */
 apt_string_assign(&mrcp_message->body, text, schannel->memory_pool);

 /* Empty audio queue and send SPEAK to MRCP server */
 audio_queue_clear(schannel->audio_queue);
 if (mrcp_application_message_send(schannel->unimrcp_session, schannel->unimrcp_channel, mrcp_message) == FALSE) {
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }
 /* wait for IN-PROGRESS */
 while (schannel->state == SPEECH_CHANNEL_READY) {
 	if (switch_thread_cond_timedwait(schannel->cond, schannel->mutex, SPEECH_CHANNEL_TIMEOUT_USEC) == SWITCH_STATUS_TIMEOUT && !warned) {
 		warned = 1;
 		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_WARNING, "(%s) SPEAK IN-PROGRESS not received after %d ms\n", schannel->name, SPEECH_CHANNEL_TIMEOUT_USEC / (1000));
 	}
 }
 if (schannel->state != SPEECH_CHANNEL_PROCESSING) {
 	status = SWITCH_STATUS_FALSE;
 	goto done;
 }

done:

 switch_mutex_unlock(schannel->mutex);
 return status;
}
  1. mod_unimrcp.c#synth_speech_read_tts() 函数的核心是执行 mod_unimrcp.c#speech_channel_read()mod_unimrcp.c#speech_channel_read()函数的关键则是检查 tts 的 channel 状态,当其状态符合要求的时候从 channel 的语音流缓冲队列中读取数据。此时回到本节步骤6第3步switch_core.c#switch_core_session_write_frame() 函数会将从 MRCP 服务器传输过来到语音流数据写入到当前会话缓冲,经过编码转化,最终将通过 RTP 发送到终端播放,至此 tts 语音合成处理流程基本结束
static switch_status_t synth_speech_read_tts(switch_speech_handle_t *sh, void *data, switch_size_t *datalen, switch_speech_flag_t *flags)
{
 switch_status_t status = SWITCH_STATUS_SUCCESS;
 switch_size_t bytes_read;
 speech_channel_t *schannel = (speech_channel_t *) sh->private_info;
 bytes_read = *datalen;
 if (speech_channel_read(schannel, data, &bytes_read, (*flags & SWITCH_SPEECH_FLAG_BLOCKING)) == SWITCH_STATUS_SUCCESS) {
 	/* pad data, if not enough read */
 	if (bytes_read < *datalen) {
#ifdef MOD_UNIMRCP_DEBUG_AUDIO_QUEUE
 		switch_log_printf(SWITCH_CHANNEL_UUID_LOG(schannel->session_uuid), SWITCH_LOG_DEBUG, "(%s) adding %ld bytes of padding\n", schannel->name, *datalen - bytes_read);
#endif
 		memset((uint8_t *) data + bytes_read, schannel->silence, *datalen - bytes_read);
 	}
 } else {
 	/* ready for next speak request */
 	speech_channel_set_state(schannel, SPEECH_CHANNEL_READY);
 	*datalen = 0;
 	status = SWITCH_STATUS_BREAK;
 }

 /* report negotiated sample rate back to FreeSWITCH */
 sh->native_rate = schannel->rate;

 return status;
}

static switch_status_t speech_channel_read(speech_channel_t *schannel, void *data, switch_size_t *len, int block)
{
 switch_status_t status = SWITCH_STATUS_SUCCESS;

 if (!schannel || !schannel->mutex || !schannel->audio_queue) {
 	return SWITCH_STATUS_FALSE;
 }

 switch (schannel->state) {
 case SPEECH_CHANNEL_DONE:
 	/* pull any remaining audio - never blocking */
 	if (audio_queue_read(schannel->audio_queue, data, len, 0) == SWITCH_STATUS_FALSE) {
 		/* all frames read */
 		status = SWITCH_STATUS_BREAK;
 	}
 	break;
 case SPEECH_CHANNEL_PROCESSING:
 	/* IN-PROGRESS */
 	audio_queue_read(schannel->audio_queue, data, len, block);
 	break;
 default:
 	status = SWITCH_STATUS_BREAK;
 }

 return status;
}