Added mod_ssml (speech synthesis markup language) ssml:// and tts:// file formats.

author Chris Rienzo <chris.rienzo@grasshopper.com>

Thu, 6 Jun 2013 19:34:49 +0000 (15:34 -0400)

committer Travis Cross <tc@traviscross.com>

Fri, 7 Jun 2013 01:08:08 +0000 (01:08 +0000)
author Chris Rienzo <chris.rienzo@grasshopper.com>
Thu, 6 Jun 2013 19:34:49 +0000 (15:34 -0400)
committer Travis Cross <tc@traviscross.com>
Fri, 7 Jun 2013 01:08:08 +0000 (01:08 +0000)
diff --git a/build/modules.conf.in b/build/modules.conf.in

index 487ff4007af5112e2d017af1ed4847a6a78e8724..35ad6b5909826e379bc047dd0afa16be613a9eed 100644 (file)
--- a/build/modules.conf.in
+++ b/build/modules.conf.in
@@ -100,6 +100,7 @@ formats/mod_native_file
  #formats/mod_shell_stream
  #formats/mod_shout
  formats/mod_sndfile
+#formats/mod_ssml
  formats/mod_tone_stream
  #formats/mod_vlc
  #languages/mod_java
diff --git a/src/mod/formats/mod_ssml/Makefile b/src/mod/formats/mod_ssml/Makefile

new file mode 100644 (file)

index 0000000..c18e12f
--- /dev/null
+++ b/src/mod/formats/mod_ssml/Makefile
@@ -0,0 +1,11 @@
+BASE=../../../..
+
+IKS_DIR=$(BASE)/libs/iksemel
+IKS_LA=$(IKS_DIR)/src/libiksemel.la
+LOCAL_CFLAGS += -I$(BASE)/libs/iksemel/include
+LOCAL_OBJS= $(IKS_LA)
+include $(BASE)/build/modmake.rules
+
+$(IKS_LA): $(IKS_DIR) $(IKS_DIR)/.update
+       @cd $(IKS_DIR) && $(MAKE)
+       @$(TOUCH_TARGET)
diff --git a/src/mod/formats/mod_ssml/conf/autoload_configs/ssml.conf.xml b/src/mod/formats/mod_ssml/conf/autoload_configs/ssml.conf.xml

new file mode 100644 (file)

index 0000000..0a18d6b
--- /dev/null
+++ b/src/mod/formats/mod_ssml/conf/autoload_configs/ssml.conf.xml
@@ -0,0 +1,26 @@
+<configuration name="ssml.conf" description="SSML parser config">
+
+       <!-- voices in order of preference -->
+       <tts-voices>
+               <voice name="slt" language="en-US" gender="female" prefix="tts://flite|slt|"/>
+               <voice name="kal" language="en-US" gender="male" prefix="tts://flite|kal|"/>
+               <voice name="rms" language="en-US" gender="male" prefix="tts://flite|rms|"/>
+               <voice name="awb" language="en-US" gender="male" prefix="tts://flite|awb|"/>
+       </tts-voices>
+
+       <!-- maps ISO language to say module -->
+       <language-map>
+               <language iso="en-US" say-module="en" language="en"/>
+       </language-map>
+
+       <!-- say voices in order of preference -->
+       <say-voices>
+               <voice name="callie" language="en-US" gender="female" prefix="$${sounds_dir}/en/us/callie/"/>
+       </say-voices>
+
+       <!-- map interpret-as to say macros -->
+       <macros>
+               <macro name="cardinal" method="pronounced" type="number"/>
+       </macros>
+
+</configuration>
diff --git a/src/mod/formats/mod_ssml/mod_ssml.c b/src/mod/formats/mod_ssml/mod_ssml.c

new file mode 100644 (file)

index 0000000..ce2e205
--- /dev/null
+++ b/src/mod/formats/mod_ssml/mod_ssml.c
@@ -0,0 +1,1139 @@
+/*
+ * mod_ssml for FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
+ * Copyright (C) 2013, Grasshopper
+ *
+ * Version: MPL 1.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mod_ssml for FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
+ *
+ * The Initial Developer of the Original Code is Grasshopper
+ * Portions created by the Initial Developer are Copyright (C)
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ * Chris Rienzo <chris.rienzo@grasshopper.com>
+ *
+ * mod_ssml.c -- SSML audio rendering format
+ *
+ */
+#include <switch.h>
+#include <iksemel.h>
+
+SWITCH_MODULE_LOAD_FUNCTION(mod_ssml_load);
+SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_ssml_shutdown);
+SWITCH_MODULE_DEFINITION(mod_ssml, mod_ssml_load, mod_ssml_shutdown, NULL);
+
+#define MAX_VOICE_FILES 256
+#define MAX_VOICE_PRIORITY 999
+#define VOICE_NAME_PRIORITY 1000
+#define VOICE_GENDER_PRIORITY 1000
+#define VOICE_LANG_PRIORITY 1000000
+
+struct ssml_parser;
+
+/** function to handle tag attributes */
+typedef int (* tag_attribs_fn)(struct ssml_parser *, char **);
+/** function to handle tag CDATA */
+typedef int (* tag_cdata_fn)(struct ssml_parser *, char *, size_t);
+
+/**
+ * Tag definition
+ */
+struct tag_def {
+       tag_attribs_fn attribs_fn;
+       tag_cdata_fn cdata_fn;
+       switch_bool_t is_root;
+       switch_hash_t *children_tags;
+};
+
+/**
+ * Module configuration
+ */
+static struct {
+       /** Mapping of mod-name-language-gender to voice */
+       switch_hash_t *voice_cache;
+       /** Mapping of voice names */
+       switch_hash_t *say_voice_map;
+       /** Mapping of voice names */
+       switch_hash_t *tts_voice_map;
+       /** Mapping of interpret-as value to macro */
+       switch_hash_t *interpret_as_map;
+       /** Mapping of ISO language code to say-module */
+       switch_hash_t *language_map;
+       /** Mapping of tag name to definition */
+       switch_hash_t *tag_defs;
+       /** module memory pool */
+       switch_memory_pool_t *pool;
+} globals;
+
+/**
+ * A say language
+ */
+struct language {
+       /** The ISO language code */
+       char *iso;
+       /** The FreeSWITCH language code */
+       char *language;
+       /** The say module name */
+       char *say_module;
+};
+
+/**
+ * A say macro
+ */
+struct macro {
+       /** interpret-as name (cardinal...) */
+       char *name;
+       /** language (en-US, en-UK, ...) */
+       char *language;
+       /** type (number, items, persons, messages...) */
+       char *type;
+       /** method (pronounced, counted, iterated...) */
+       char *method;
+};
+
+/**
+ * A TTS voice
+ */
+struct voice {
+       /** higher priority = more likely to pick */
+       int priority;
+       /** voice gender */
+       char *gender;
+       /** voice name / macro */
+       char *name;
+       /** voice language */
+       char *language;
+       /** internal file prefix */
+       char *prefix;
+};
+
+#define TAG_LEN 32
+#define NAME_LEN 128
+#define LANGUAGE_LEN 6
+#define GENDER_LEN 8
+
+/**
+ * SSML voice state
+ */
+struct ssml_node {
+       /** tag name */
+       char tag_name[TAG_LEN];
+       /** requested name */
+       char name[NAME_LEN];
+       /** requested language */
+       char language[LANGUAGE_LEN];
+       /** requested gender */
+       char gender[GENDER_LEN];
+       /** voice to use */
+       struct voice *tts_voice;
+       /** say macro to use */
+       struct macro *say_macro;
+       /** tag handling data */
+       struct tag_def *tag_def;
+       /** previous node */
+       struct ssml_node *parent_node;
+};
+
+/**
+ * A file to play
+ */
+struct ssml_file {
+       /** prefix to add to file handle */
+       char *prefix;
+       /** the file to play */
+       const char *name;
+};
+
+/**
+ * SSML parser state
+ */
+struct ssml_parser {
+       /** current attribs */
+       struct ssml_node *cur_node;
+       /** files to play */
+       struct ssml_file *files;
+       /** number of files */
+       int num_files;
+       /** max files to play */
+       int max_files;
+       /** memory pool to use */
+       switch_memory_pool_t *pool;
+       /** desired sample rate */
+       int sample_rate;
+};
+
+/**
+ * SSML playback state
+ */
+struct ssml_context {
+       /** handle to current file */
+       switch_file_handle_t fh;
+       /** files to play */
+       struct ssml_file *files;
+       /** number of files */
+       int num_files;
+       /** current file being played */
+       int index;
+};
+
+/**
+ * Add a definition for a tag
+ * @param tag the name
+ * @param attribs_fn the function to handle the tag attributes
+ * @param cdata_fn the function to handler the tag CDATA
+ * @param children_tags comma-separated list of valid child tag names
+ * @return the definition
+ */
+static struct tag_def *add_tag_def(const char *tag, tag_attribs_fn attribs_fn, tag_cdata_fn cdata_fn, const char *children_tags)
+{
+       struct tag_def *def = switch_core_alloc(globals.pool, sizeof(*def));
+       switch_core_hash_init(&def->children_tags, globals.pool);
+       if (!zstr(children_tags)) {
+               char *children_tags_dup = switch_core_strdup(globals.pool, children_tags);
+               char *tags[32] = { 0 };
+               int tag_count = switch_separate_string(children_tags_dup, ',', tags, sizeof(tags) / sizeof(tags[0]));
+               if (tag_count) {
+                       int i;
+                       for (i = 0; i < tag_count; i++) {
+                               switch_core_hash_insert(def->children_tags, tags[i], tags[i]);
+                       }
+               }
+       }
+       def->attribs_fn = attribs_fn;
+       def->cdata_fn = cdata_fn;
+       def->is_root = SWITCH_FALSE;
+       switch_core_hash_insert(globals.tag_defs, tag, def);
+       return def;
+}
+
+/**
+ * Add a definition for a root tag
+ * @param tag the name
+ * @param attribs_fn the function to handle the tag attributes
+ * @param cdata_fn the function to handler the tag CDATA
+ * @param children_tags comma-separated list of valid child tag names
+ * @return the definition
+ */
+static struct tag_def *add_root_tag_def(const char *tag, tag_attribs_fn attribs_fn, tag_cdata_fn cdata_fn, const char *children_tags)
+{
+       struct tag_def *def = add_tag_def(tag, attribs_fn, cdata_fn, children_tags);
+       def->is_root = SWITCH_TRUE;
+       return def;
+}
+
+/**
+ * Handle tag attributes
+ * @param parser the parser
+ * @param name the tag name
+ * @param atts the attributes
+ * @return IKS_OK if OK IKS_BADXML on parse failure
+ */
+static int process_tag(struct ssml_parser *parser, const char *name, char **atts)
+{
+       struct tag_def *def = switch_core_hash_find(globals.tag_defs, name);
+       if (def) {
+               parser->cur_node->tag_def = def;
+               if (def->is_root && parser->cur_node->parent_node == NULL) {
+                       /* no parent for ROOT tags */
+                       return def->attribs_fn(parser, atts);
+               } else if (!def->is_root && parser->cur_node->parent_node) {
+                       /* check if this child is allowed by parent node */
+                       struct tag_def *parent_def = parser->cur_node->parent_node->tag_def;
+                       if (switch_core_hash_find(parent_def->children_tags, "ANY") ||
+                               switch_core_hash_find(parent_def->children_tags, name)) {
+                               return def->attribs_fn(parser, atts);
+                       }
+               }
+       }
+       return IKS_BADXML;
+}
+
+/**
+ * Handle tag attributes that are ignored
+ * @param parser the parser
+ * @param atts the attributes
+ * @return IKS_OK
+ */
+static int process_attribs_ignore(struct ssml_parser *parser, char **atts)
+{
+       return IKS_OK;
+}
+
+/**
+ * Handle CDATA that is ignored
+ * @param parser the parser
+ * @param data the CDATA
+ * @param len the CDATA length
+ * @return IKS_OK
+ */
+static int process_cdata_ignore(struct ssml_parser *parser, char *data, size_t len)
+{
+       return IKS_OK;
+}
+
+/**
+ * Handle CDATA that is not allowed
+ * @param parser the parser
+ * @param data the CDATA
+ * @param len the CDATA length
+ * @return IKS_BADXML
+ */
+static int process_cdata_bad(struct ssml_parser *parser, char *data, size_t len)
+{
+       int i;
+       for (i = 0; i < len; i++) {
+               if (isgraph(data[i])) {
+                       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Unexpected CDATA for <%s>\n", parser->cur_node->tag_name);
+                       return IKS_BADXML;
+               }
+       }
+       return IKS_OK;
+}
+
+/**
+ * Score the voice on how close it is to desired language, name, and gender
+ * @param voice the voice to score
+ * @param cur_node the desired voice attributes
+ * @param lang_required if true, language must match
+ * @return the score
+ */
+static int score_voice(struct voice *voice, struct ssml_node *cur_node, int lang_required)
+{
+       /* language > gender,name > priority */
+       int score = voice->priority;
+       if (!zstr_buf(cur_node->gender) && !strcmp(cur_node->gender, voice->gender)) {
+               score += VOICE_GENDER_PRIORITY;
+       }
+       if (!zstr_buf(cur_node->name) && !strcmp(cur_node->name, voice->name)) {
+               score += VOICE_NAME_PRIORITY;
+       }
+       if (!zstr_buf(cur_node->language) && !strcmp(cur_node->language, voice->language)) {
+               score += VOICE_LANG_PRIORITY;
+       } else if (lang_required) {
+               score = 0;
+       }
+       return score;
+}
+
+/**
+ * Search for best voice based on attributes
+ * @param cur_node the desired voice attributes
+ * @param map the map to search
+ * @param type "say" or "tts"
+ * @param lang_required if true, language must match
+ * @return the voice or NULL
+ */
+static struct voice *find_voice(struct ssml_node *cur_node, switch_hash_t *map, char *type, int lang_required)
+{
+       switch_hash_index_t *hi = NULL;
+       struct voice *voice = (struct voice *)switch_core_hash_find(map, cur_node->name);
+       char *lang_name_gender = NULL;
+       int best_score = 0;
+
+       /* check cache */
+       lang_name_gender = switch_mprintf("%s-%s-%s-%s", type, cur_node->language, cur_node->name, cur_node->gender);
+       voice = (struct voice *)switch_core_hash_find(globals.voice_cache, lang_name_gender);
+       if (voice) {
+               /* that was easy! */
+               goto done;
+       }
+
+       /* find best language, name, gender match */
+       for (hi = switch_hash_first(NULL, map); hi; hi = switch_hash_next(hi)) {
+               const void *key;
+               void *val;
+               struct voice *candidate;
+               int candidate_score = 0;
+               switch_hash_this(hi, &key, NULL, &val);
+               candidate = (struct voice *)val;
+               candidate_score = score_voice(candidate, cur_node, lang_required);
+               if (candidate_score > 0 && candidate_score > best_score) {
+                       voice = candidate;
+                       best_score = candidate_score;
+               }
+       }
+
+       /* remember for next time */
+       if (voice) {
+               switch_core_hash_insert(globals.voice_cache, lang_name_gender, voice);
+       }
+
+done:
+       switch_safe_free(lang_name_gender);
+
+       return voice;
+}
+
+/**
+ * Search for best voice based on attributes
+ * @param cur_node the desired voice attributes
+ * @return the voice or NULL
+ */
+static struct voice *find_tts_voice(struct ssml_node *cur_node)
+{
+       return find_voice(cur_node, globals.tts_voice_map, "tts", 0);
+}
+
+/**
+ * Search for best voice based on attributes
+ * @param cur_node the desired voice attributes
+ * @return the voice or NULL
+ */
+static struct voice *find_say_voice(struct ssml_node *cur_node)
+{
+       return find_voice(cur_node, globals.say_voice_map, "say", 1);
+}
+
+/**
+ * open next file for reading
+ * @param handle the file handle
+ */
+static switch_status_t next_file(switch_file_handle_t *handle)
+{
+       struct ssml_context *context = handle->private_info;
+       const char *file;
+
+  top:
+
+       context->index++;
+
+       if (switch_test_flag((&context->fh), SWITCH_FILE_OPEN)) {
+               switch_core_file_close(&context->fh);
+       }
+
+       if (context->index >= context->num_files) {
+               return SWITCH_STATUS_FALSE;
+       }
+
+
+       file = context->files[context->index].name;
+       context->fh.prefix = context->files[context->index].prefix;
+
+       if (switch_test_flag(handle, SWITCH_FILE_FLAG_WRITE)) {
+               /* unsupported */
+               return SWITCH_STATUS_FALSE;
+       }
+
+       if (switch_core_file_open(&context->fh, file, handle->channels, handle->samplerate, handle->flags, NULL) != SWITCH_STATUS_SUCCESS) {
+               goto top;
+       }
+
+       handle->samples = context->fh.samples;
+       handle->format = context->fh.format;
+       handle->sections = context->fh.sections;
+       handle->seekable = context->fh.seekable;
+       handle->speed = context->fh.speed;
+       handle->interval = context->fh.interval;
+
+       if (switch_test_flag((&context->fh), SWITCH_FILE_NATIVE)) {
+               switch_set_flag(handle, SWITCH_FILE_NATIVE);
+       } else {
+               switch_clear_flag(handle, SWITCH_FILE_NATIVE);
+       }
+
+       return SWITCH_STATUS_SUCCESS;
+}
+
+/**
+ * Process xml:lang attribute
+ */
+static int process_xml_lang(struct ssml_parser *parsed_data, char **atts)
+{
+       struct ssml_node *cur_node = parsed_data->cur_node;
+
+       /* only allow language change in <speak>, <p>, and <s> */
+       if (atts) {
+               int i = 0;
+               while (atts[i]) {
+                       if (!strcmp("xml:lang", atts[i])) {
+                               if (!zstr(atts[i + 1])) {
+                               strncpy(cur_node->language, atts[i + 1], LANGUAGE_LEN);
+                               cur_node->language[LANGUAGE_LEN - 1] = '\0';
+                               }
+                       }
+                       i += 2;
+               }
+       }
+       cur_node->tts_voice = find_tts_voice(cur_node);
+       return IKS_OK;
+}
+
+/**
+ * Process <voice>
+ */
+static int process_voice(struct ssml_parser *parsed_data, char **atts)
+{
+       struct ssml_node *cur_node = parsed_data->cur_node;
+       if (atts) {
+               int i = 0;
+               while (atts[i]) {
+                       if (!strcmp("xml:lang", atts[i])) {
+                               if (!zstr(atts[i + 1])) {
+                                       strncpy(cur_node->language, atts[i + 1], LANGUAGE_LEN);
+                                       cur_node->language[LANGUAGE_LEN - 1] = '\0';
+                               }
+                       } else if (!strcmp("name", atts[i])) {
+                               if (!zstr(atts[i + 1])) {
+                                       strncpy(cur_node->name, atts[i + 1], NAME_LEN);
+                                       cur_node->name[NAME_LEN - 1] = '\0';
+                               }
+                       } else if (!strcmp("gender", atts[i])) {
+                               if (!zstr(atts[i + 1])) {
+                                       strncpy(cur_node->gender, atts[i + 1], GENDER_LEN);
+                                       cur_node->gender[GENDER_LEN - 1] = '\0';
+                               }
+                       }
+                       i += 2;
+               }
+       }
+       cur_node->tts_voice = find_tts_voice(cur_node);
+       return IKS_OK;
+}
+
+/**
+ * Process <say-as>
+ */
+static int process_say_as(struct ssml_parser *parsed_data, char **atts)
+{
+       struct ssml_node *cur_node = parsed_data->cur_node;
+       if (atts) {
+               int i = 0;
+               while (atts[i]) {
+                       if (!strcmp("interpret-as", atts[i])) {
+                               char *interpret_as = atts[i + 1];
+                               if (!zstr(interpret_as)) {
+                                       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "interpret-as: %s\n", atts[i + 1]);
+                                       cur_node->say_macro = (struct macro *)switch_core_hash_find(globals.interpret_as_map, interpret_as);
+                               }
+                               break;
+                       }
+                       i += 2;
+               }
+       }
+       cur_node->tts_voice = find_tts_voice(cur_node);
+       return IKS_OK;
+}
+
+/**
+ * Process <break>- this is a period of silence
+ */
+static int process_break(struct ssml_parser *parsed_data, char **atts)
+{
+       if (atts) {
+               int i = 0;
+               while (atts[i]) {
+                       if (!strcmp("time", atts[i])) {
+                               char *t = atts[i + 1];
+                               if (!zstr(t) && parsed_data->num_files < parsed_data->max_files) {
+                                       int timeout_ms = 0;
+                                       char *unit;
+                                       if ((unit = strstr(t, "ms"))) {
+                                               *unit = '\0';
+                                               if (switch_is_number(t)) {
+                                                       timeout_ms = atoi(t);
+                                               }
+                                       } else if ((unit = strstr(t, "s"))) {
+                                               *unit = '\0';
+                                               if (switch_is_number(t)) {
+                                                       timeout_ms = atoi(t) * 1000;
+                                               }
+                                       }
+                                       if (timeout_ms > 0) {
+                                               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Adding <break>: \"%s\"\n", t);
+                                               parsed_data->files[parsed_data->num_files].name = switch_core_sprintf(parsed_data->pool, "silence_stream://%i", timeout_ms);
+                                               parsed_data->files[parsed_data->num_files++].prefix = NULL;
+                                       }
+                               }
+                               return IKS_OK;
+                       }
+                       i += 2;
+               }
+       }
+       return IKS_OK;
+}
+
+/**
+ * Process <audio>- this is a URL to play
+ */
+static int process_audio(struct ssml_parser *parsed_data, char **atts)
+{
+       if (atts) {
+               int i = 0;
+               while (atts[i]) {
+                       if (!strcmp("src", atts[i])) {
+                               char *src = atts[i + 1];
+                               if (!zstr(src) && parsed_data->num_files < parsed_data->max_files) {
+                                       /* get the URI */
+                                       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Adding <audio>: \"%s\"\n", src);
+                                       parsed_data->files[parsed_data->num_files].name = switch_core_strdup(parsed_data->pool, src);
+                                       parsed_data->files[parsed_data->num_files++].prefix = NULL;
+                               }
+                               return IKS_OK;
+                       }
+                       i += 2;
+               }
+       }
+       return IKS_OK;
+}
+
+/**
+ * Process a tag
+ */
+static int tag_hook(void *user_data, char *name, char **atts, int type)
+{
+       int result = IKS_OK;
+       struct ssml_parser *parsed_data = (struct ssml_parser *)user_data;
+       struct ssml_node *new_node = malloc(sizeof *new_node);
+       struct ssml_node *parent_node = parsed_data->cur_node;
+
+       if (type == IKS_OPEN || type == IKS_SINGLE) {
+               if (parent_node) {
+                       /* inherit parent attribs */
+                       *new_node = *parent_node;
+                       new_node->parent_node = parent_node;
+               } else {
+                       new_node->name[0] = '\0';
+                       new_node->language[0] = '\0';
+                       new_node->gender[0] = '\0';
+                       new_node->parent_node = NULL;
+               }
+               new_node->tts_voice = NULL;
+               new_node->say_macro = NULL;
+               strncpy(new_node->tag_name, name, TAG_LEN);
+               new_node->tag_name[TAG_LEN - 1] = '\0';
+               parsed_data->cur_node = new_node;
+               result = process_tag(parsed_data, name, atts);
+       }
+
+       if (type == IKS_CLOSE || type == IKS_SINGLE) {
+               if (parsed_data->cur_node) {
+                       struct ssml_node *parent_node = parsed_data->cur_node->parent_node;
+                       free(parsed_data->cur_node);
+                       parsed_data->cur_node = parent_node;
+               }
+       }
+
+       return result;
+}
+
+/**
+ * Try to get file(s) from say module
+ * @param parsed_data
+ * @param to_say
+ * @return 1 if successful
+ */
+static int get_file_from_macro(struct ssml_parser *parsed_data, char *to_say)
+{
+       struct ssml_node *cur_node = parsed_data->cur_node;
+       struct macro *say_macro = cur_node->say_macro;
+       struct voice *say_voice = find_say_voice(cur_node);
+       struct language *language;
+       char *file_string = NULL;
+       char *gender = NULL;
+       switch_say_interface_t *si;
+
+       /* voice is required */
+       if (!say_voice) {
+               return 0;
+       }
+
+       language = switch_core_hash_find(globals.language_map, say_voice->language);
+       /* language is required */
+       if (!language) {
+               return 0;
+       }
+
+       /* TODO need to_say gender, not voice gender */
+       gender = "neuter";
+
+       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Trying macro: %s, %s, %s, %s, %s\n", language->language, to_say, say_macro->type, say_macro->method, gender);
+
+       if ((si = switch_loadable_module_get_say_interface(language->say_module)) && si->say_string_function) {
+               switch_say_args_t say_args = {0};
+               say_args.type = switch_ivr_get_say_type_by_name(say_macro->type);
+               say_args.method = switch_ivr_get_say_method_by_name(say_macro->method);
+               say_args.gender = switch_ivr_get_say_gender_by_name(gender);
+               say_args.ext = "wav";
+               si->say_string_function(NULL, to_say, &say_args, &file_string);
+       }
+       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Adding macro: \"%s\", prefix=\"%s\"\n", file_string, say_voice->prefix);
+       if (!zstr(file_string)) {
+               parsed_data->files[parsed_data->num_files].name = switch_core_strdup(parsed_data->pool, file_string);
+               parsed_data->files[parsed_data->num_files++].prefix = switch_core_strdup(parsed_data->pool, say_voice->prefix);
+               return 1;
+       }
+       switch_safe_free(file_string);
+
+       return 0;
+}
+
+/**
+ * Get TTS file for voice
+ */
+static int get_file_from_voice(struct ssml_parser *parsed_data, char *to_say)
+{
+       struct ssml_node *cur_node = parsed_data->cur_node;
+       char *file = switch_core_sprintf(parsed_data->pool, "%s%s", cur_node->tts_voice->prefix, to_say);
+       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Adding <%s>: \"%s\"\n", cur_node->tag_name, file);
+       parsed_data->files[parsed_data->num_files].name = file;
+       parsed_data->files[parsed_data->num_files++].prefix = NULL;
+       return 1;
+}
+
+/**
+ * Get TTS from CDATA
+ */
+static int process_cdata_tts(struct ssml_parser *parsed_data, char *data, size_t len)
+{
+       struct ssml_node *cur_node = parsed_data->cur_node;
+       if (!len) {
+               return IKS_OK;
+       }
+       if (cur_node && cur_node->tts_voice && parsed_data->num_files < parsed_data->max_files) {
+               int i = 0;
+               int empty = 1;
+               char *to_say;
+
+               /* is CDATA empty? */
+               for (i = 0; i < len && empty; i++) {
+                       empty &= !isgraph(data[i]);
+               }
+               if (empty) {
+                       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Skipping empty tts\n");
+                       return IKS_OK;
+               }
+
+               /* try macro */
+               to_say = malloc(len + 1);
+               strncpy(to_say, data, len);
+               to_say[len] = '\0';
+               if (!cur_node->say_macro || !get_file_from_macro(parsed_data, to_say)) {
+                       /* use voice instead */
+                       get_file_from_voice(parsed_data, to_say);
+               }
+               free(to_say);
+               return IKS_OK;
+       }
+       return IKS_BADXML;
+}
+
+/**
+ * Process <sub>- this is an alias for text to speak
+ */
+static int process_sub(struct ssml_parser *parsed_data, char **atts)
+{
+       if (atts) {
+               int i = 0;
+               while (atts[i]) {
+                       if (!strcmp("alias", atts[i])) {
+                               char *alias = atts[i + 1];
+                               if (!zstr(alias)) {
+                                       return process_cdata_tts(parsed_data, alias, strlen(alias));
+                               }
+                               return IKS_BADXML;
+                       }
+                       i += 2;
+               }
+       }
+       return IKS_OK;
+}
+
+/**
+ * Process cdata
+ */
+static int cdata_hook(void *user_data, char *data, size_t len)
+{
+       struct ssml_parser *parsed_data = (struct ssml_parser *)user_data;
+       if (!parsed_data) {
+               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Missing parser\n");
+               return IKS_BADXML;
+       }
+       if (parsed_data->cur_node) {
+               struct tag_def *handler = switch_core_hash_find(globals.tag_defs, parsed_data->cur_node->tag_name);
+               if (handler) {
+                       return handler->cdata_fn(parsed_data, data, len);
+               }
+               return IKS_BADXML;
+       }
+       return IKS_OK;
+}
+
+/**
+ * Transforms SSML into file_string format and opens file_string.
+ * @param handle
+ * @param path the inline SSML
+ * @return SWITCH_STATUS_SUCCESS if opened
+ */
+static switch_status_t ssml_file_open(switch_file_handle_t *handle, const char *path)
+{
+       switch_status_t status = SWITCH_STATUS_FALSE;
+       struct ssml_context *context = switch_core_alloc(handle->memory_pool, sizeof(*context));
+       struct ssml_parser *parsed_data = switch_core_alloc(handle->memory_pool, sizeof(*parsed_data));
+       iksparser *parser = iks_sax_new(parsed_data, tag_hook, cdata_hook);
+
+       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Open: %s\n", path);
+
+       parsed_data->cur_node = NULL;
+       parsed_data->files = switch_core_alloc(handle->memory_pool, sizeof(struct ssml_file) * MAX_VOICE_FILES);
+       parsed_data->max_files = MAX_VOICE_FILES;
+       parsed_data->num_files = 0;
+       parsed_data->pool = handle->memory_pool;
+       parsed_data->sample_rate = handle->samplerate;
+
+       if (iks_parse(parser, path, 0, 1) == IKS_OK) {
+               if (parsed_data->num_files) {
+                       context->files = parsed_data->files;
+                       context->num_files = parsed_data->num_files;
+                       context->index = -1;
+                       handle->private_info = context;
+                       status = next_file(handle);
+               } else {
+                       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "No files to play: %s\n", path);
+               }
+       } else {
+               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Parse error: %s, num_files = %i\n", path, parsed_data->num_files);
+       }
+
+       iks_parser_delete(parser);
+
+       return status;
+}
+
+/**
+ * Close SSML document.
+ * @param handle
+ * @return SWITCH_STATUS_SUCCESS
+ */
+static switch_status_t ssml_file_close(switch_file_handle_t *handle)
+{
+       struct ssml_context *context = (struct ssml_context *)handle->private_info;
+       if (switch_test_flag((&context->fh), SWITCH_FILE_OPEN)) {
+               return switch_core_file_close(&context->fh);
+       }
+
+       return SWITCH_STATUS_SUCCESS;
+}
+
+/**
+ * Read from SSML document
+ * @param handle
+ * @param data
+ * @param len
+ * @return
+ */
+static switch_status_t ssml_file_read(switch_file_handle_t *handle, void *data, size_t *len)
+{
+       switch_status_t status;
+       struct ssml_context *context = (struct ssml_context *)handle->private_info;
+       size_t llen = *len;
+
+       status = switch_core_file_read(&context->fh, data, len);
+       if (status != SWITCH_STATUS_SUCCESS) {
+               if ((status = next_file(handle)) != SWITCH_STATUS_SUCCESS) {
+                       return status;
+               }
+               *len = llen;
+               status = switch_core_file_read(&context->fh, data, len);
+       }
+       return status;
+}
+
+/**
+ * Seek file
+ */
+static switch_status_t ssml_file_seek(switch_file_handle_t *handle, unsigned int *cur_sample, int64_t samples, int whence)
+{
+       struct ssml_context *context = handle->private_info;
+
+       if (samples == 0 && whence == SWITCH_SEEK_SET) {
+               /* restart from beginning */
+               context->index = -1;
+               return next_file(handle);
+       }
+
+       if (!handle->seekable) {
+               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "File is not seekable\n");
+               return SWITCH_STATUS_NOTIMPL;
+       }
+
+       return switch_core_file_seek(&context->fh, cur_sample, samples, whence);
+}
+
+/**
+ * TTS playback state
+ */
+struct tts_context {
+       /** handle to TTS engine */
+       switch_speech_handle_t sh;
+       /** TTS flags */
+       switch_speech_flag_t flags;
+       /** maximum number of samples to read at a time */
+       int max_frame_size;
+       /** done flag */
+       int done;
+};
+
+/**
+ * Do TTS as file format
+ * @param handle
+ * @param path the inline SSML
+ * @return SWITCH_STATUS_SUCCESS if opened
+ */
+static switch_status_t tts_file_open(switch_file_handle_t *handle, const char *path)
+{
+       switch_status_t status = SWITCH_STATUS_SUCCESS;
+       struct tts_context *context = switch_core_alloc(handle->memory_pool, sizeof(*context));
+       char *arg_string = switch_core_strdup(handle->memory_pool, path);
+       char *args[3] = { 0 };
+       int argc = switch_separate_string(arg_string, '|', args, (sizeof(args) / sizeof(args[0])));
+       char *module;
+       char *voice;
+       char *document;
+
+       /* path is module:(optional)profile|voice|{param1=val1,param2=val2}TTS document */
+       if (argc != 3) {
+               return SWITCH_STATUS_FALSE;
+       }
+       module = args[0];
+       voice = args[1];
+       document = args[2];
+
+       memset(context, 0, sizeof(*context));
+       context->flags = SWITCH_SPEECH_FLAG_NONE;
+       if ((status = switch_core_speech_open(&context->sh, module, voice, handle->samplerate, handle->interval, &context->flags, NULL)) == SWITCH_STATUS_SUCCESS) {
+               if ((status = switch_core_speech_feed_tts(&context->sh, document, &context->flags)) == SWITCH_STATUS_SUCCESS) {
+                       handle->channels = 1;
+                       handle->samples = 0;
+                       handle->format = 0;
+                       handle->sections = 0;
+                       handle->seekable = 0;
+                       handle->speed = 0;
+                       context->max_frame_size = handle->samplerate / 1000 * SWITCH_MAX_INTERVAL;
+               } else {
+                       switch_core_speech_close(&context->sh, &context->flags);
+               }
+       }
+       handle->private_info = context;
+       return status;
+}
+
+/**
+ * Read audio from TTS engine
+ * @param handle
+ * @param data
+ * @param len
+ * @return
+ */
+static switch_status_t tts_file_read(switch_file_handle_t *handle, void *data, size_t *len)
+{
+       switch_status_t status = SWITCH_STATUS_SUCCESS;
+       struct tts_context *context = (struct tts_context *)handle->private_info;
+       switch_size_t rlen;
+
+       if (*len > context->max_frame_size) {
+               *len = context->max_frame_size;
+       }
+       rlen = *len * 2; /* rlen (bytes) = len (samples) * 2 */
+
+       if (!context->done) {
+               context->flags = SWITCH_SPEECH_FLAG_BLOCKING;
+               if ((status = switch_core_speech_read_tts(&context->sh, data, &rlen, &context->flags))) {
+                       context->done = 1;
+               }
+       } else {
+               switch_core_speech_flush_tts(&context->sh);
+               memset(data, 0, rlen);
+               status = SWITCH_STATUS_FALSE;
+       }
+       *len = rlen / 2; /* len (samples) = rlen (bytes) / 2 */
+       return status;
+}
+
+/**
+ * Close TTS engine
+ * @param handle
+ * @return SWITCH_STATUS_SUCCESS
+ */
+static switch_status_t tts_file_close(switch_file_handle_t *handle)
+{
+       struct tts_context *context = (struct tts_context *)handle->private_info;
+       switch_core_speech_close(&context->sh, &context->flags);
+       return SWITCH_STATUS_SUCCESS;
+}
+
+/**
+ * Configure voices
+ * @param pool memory pool to use
+ * @param map voice map to load
+ * @param type type of voices (for logging)
+ */
+static void do_config_voices(switch_memory_pool_t *pool, switch_xml_t voices, switch_hash_t *map, const char *type)
+{
+       if (voices) {
+               int priority = MAX_VOICE_PRIORITY;
+               switch_xml_t voice;
+               for (voice = switch_xml_child(voices, "voice"); voice; voice = voice->next) {
+                       const char *name = switch_xml_attr_soft(voice, "name");
+                       const char *language = switch_xml_attr_soft(voice, "language");
+                       const char *gender = switch_xml_attr_soft(voice, "gender");
+                       const char *prefix = switch_xml_attr_soft(voice, "prefix");
+                       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "%s map (%s, %s, %s) = %s\n", type, name, language, gender, prefix);
+                       if (!zstr(name) && !zstr(prefix)) {
+                               struct voice *v = (struct voice *)switch_core_alloc(pool, sizeof(*v));
+                               v->name = switch_core_strdup(pool, name);
+                               v->language = switch_core_strdup(pool, language);
+                               v->gender = switch_core_strdup(pool, gender);
+                               v->prefix = switch_core_strdup(pool, prefix);
+                               v->priority = priority--;
+                               switch_core_hash_insert(map, name, v);
+                       }
+               }
+       }
+}
+
+/**
+ * Configure module
+ * @param pool memory pool to use
+ * @return SWITCH_STATUS_SUCCESS if module is configured
+ */
+static switch_status_t do_config(switch_memory_pool_t *pool)
+{
+       char *cf = "ssml.conf";
+       switch_xml_t cfg, xml;
+
+       if (!(xml = switch_xml_open_cfg(cf, &cfg, NULL))) {
+               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "open of %s failed\n", cf);
+               return SWITCH_STATUS_TERM;
+       }
+
+       /* get voices */
+       do_config_voices(pool, switch_xml_child(cfg, "tts-voices"), globals.tts_voice_map, "tts");
+       do_config_voices(pool, switch_xml_child(cfg, "say-voices"), globals.say_voice_map, "say");
+
+       /* get languages */
+       {
+               switch_xml_t languages = switch_xml_child(cfg, "language-map");
+               if (languages) {
+                       switch_xml_t language;
+                       for (language = switch_xml_child(languages, "language"); language; language = language->next) {
+                               const char *iso = switch_xml_attr_soft(language, "iso");
+                               const char *say_module = switch_xml_attr_soft(language, "say-module");
+                               const char *lang = switch_xml_attr_soft(language, "language");
+                               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "language map: %s = (%s, %s) \n", iso, say_module, lang);
+                               if (!zstr(iso) && !zstr(say_module) && !zstr(lang)) {
+                                       struct language *l = (struct language *)switch_core_alloc(pool, sizeof(*l));
+                                       l->iso = switch_core_strdup(pool, iso);
+                                       l->say_module = switch_core_strdup(pool, say_module);
+                                       l->language = switch_core_strdup(pool, lang);
+                                       switch_core_hash_insert(globals.language_map, iso, l);
+                               }
+                       }
+               }
+       }
+
+       /* get macros */
+       {
+               switch_xml_t macros = switch_xml_child(cfg, "macros");
+               if (macros) {
+                       switch_xml_t macro;
+                       for (macro = switch_xml_child(macros, "macro"); macro; macro = macro->next) {
+                               const char *name = switch_xml_attr_soft(macro, "name");
+                               const char *method = switch_xml_attr_soft(macro, "method");
+                               const char *type = switch_xml_attr_soft(macro, "type");
+                               switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "macro: %s = (%s, %s) \n", name, method, type);
+                               if (!zstr(name) && !zstr(type)) {
+                                       struct macro *m = (struct macro *)switch_core_alloc(pool, sizeof(*m));
+                                       m->name = switch_core_strdup(pool, name);
+                                       m->method = switch_core_strdup(pool, method);
+                                       m->type = switch_core_strdup(pool, type);
+                                       switch_core_hash_insert(globals.interpret_as_map, name, m);
+                               }
+                       }
+               }
+       }
+
+       switch_xml_free(xml);
+
+       return SWITCH_STATUS_SUCCESS;
+}
+
+static char *ssml_supported_formats[] = { "ssml", NULL };
+static char *tts_supported_formats[] = { "tts", NULL };
+
+SWITCH_MODULE_LOAD_FUNCTION(mod_ssml_load)
+{
+       switch_file_interface_t *file_interface;
+
+       *module_interface = switch_loadable_module_create_module_interface(pool, modname);
+       file_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_FILE_INTERFACE);
+       file_interface->interface_name = modname;
+       file_interface->extens = ssml_supported_formats;
+       file_interface->file_open = ssml_file_open;
+       file_interface->file_close = ssml_file_close;
+       file_interface->file_read = ssml_file_read;
+       file_interface->file_seek = ssml_file_seek;
+
+       file_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_FILE_INTERFACE);
+       file_interface->interface_name = modname;
+       file_interface->extens = tts_supported_formats;
+       file_interface->file_open = tts_file_open;
+       file_interface->file_close = tts_file_close;
+       file_interface->file_read = tts_file_read;
+       /* TODO allow skip ahead if TTS supports it
+        * file_interface->file_seek = tts_file_seek;
+        */
+
+       globals.pool = pool;
+       switch_core_hash_init(&globals.voice_cache, pool);
+       switch_core_hash_init(&globals.tts_voice_map, pool);
+       switch_core_hash_init(&globals.say_voice_map, pool);
+       switch_core_hash_init(&globals.interpret_as_map, pool);
+       switch_core_hash_init(&globals.language_map, pool);
+       switch_core_hash_init(&globals.tag_defs, pool);
+
+       add_root_tag_def("speak", process_xml_lang, process_cdata_tts, "audio,break,emphasis,mark,phoneme,prosody,say-as,voice,sub,p,s,lexicon,metadata,meta");
+       add_tag_def("p", process_xml_lang, process_cdata_tts, "audio,break,emphasis,mark,phoneme,prosody,say-as,voice,sub,s");
+       add_tag_def("s", process_xml_lang, process_cdata_tts, "audio,break,emphasis,mark,phoneme,prosody,say-as,voice,sub");
+       add_tag_def("voice", process_voice, process_cdata_tts, "audio,break,emphasis,mark,phoneme,prosody,say-as,voice,sub,p,s");
+       add_tag_def("prosody", process_attribs_ignore, process_cdata_tts, "audio,break,emphasis,mark,phoneme,prosody,say-as,voice,sub,p,s");
+       add_tag_def("audio", process_audio, process_cdata_tts, "audio,break,emphasis,mark,phoneme,prosody,say-as,voice,sub,p,s,desc");
+       add_tag_def("desc", process_attribs_ignore, process_cdata_ignore, "");
+       add_tag_def("emphasis", process_attribs_ignore, process_cdata_tts, "audio,break,emphasis,mark,phoneme,prosody,say-as,voice,sub");
+       add_tag_def("say-as", process_say_as, process_cdata_tts, "");
+       add_tag_def("sub", process_sub, process_cdata_ignore, "");
+       add_tag_def("phoneme", process_attribs_ignore, process_cdata_tts, "");
+       add_tag_def("break", process_break, process_cdata_bad, "");
+       add_tag_def("mark", process_attribs_ignore, process_cdata_bad, "");
+       add_tag_def("lexicon", process_attribs_ignore, process_cdata_bad, "");
+       add_tag_def("metadata", process_attribs_ignore, process_cdata_ignore, "ANY");
+       add_tag_def("meta", process_attribs_ignore, process_cdata_bad, "");
+
+       return do_config(pool);
+}
+
+SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_ssml_shutdown)
+{
+       return SWITCH_STATUS_SUCCESS;
+}
+
+/* For Emacs:
+ * Local Variables:
+ * mode:c
+ * indent-tabs-mode:t
+ * tab-width:4
+ * c-basic-offset:4
+ * End:
+ * For VIM:
+ * vim:set softtabstop=4 shiftwidth=4 tabstop=4:
+ */
author	Chris Rienzo <chris.rienzo@grasshopper.com>
	Thu, 6 Jun 2013 19:34:49 +0000 (15:34 -0400)
committer	Travis Cross <tc@traviscross.com>
	Fri, 7 Jun 2013 01:08:08 +0000 (01:08 +0000)
build/modules.conf.in		patch \| blob \| blame \| history
src/mod/formats/mod_ssml/Makefile	[new file with mode: 0644]	patch \| blob
src/mod/formats/mod_ssml/conf/autoload_configs/ssml.conf.xml	[new file with mode: 0644]	patch \| blob
src/mod/formats/mod_ssml/mod_ssml.c	[new file with mode: 0644]	patch \| blob