char *grammar;
char *channel_uuid;
switch_vad_t *vad;
+ int partial;
} test_asr_t;
}
if (switch_test_flag(context, ASRFLAG_RESULT)) {
+ int is_partial = context->partial-- > 0 ? 1 : 0;
*resultstr = switch_mprintf("{\"grammar\": \"%s\", \"text\": \"%s\", \"confidence\": %f}", context->grammar, context->result_text, context->result_confidence);
- switch_log_printf(SWITCH_CHANNEL_UUID_LOG(context->channel_uuid), SWITCH_LOG_ERROR, "Result: %s\n", *resultstr);
+ switch_log_printf(SWITCH_CHANNEL_UUID_LOG(context->channel_uuid), SWITCH_LOG_NOTICE, "%sResult: %s\n", is_partial ? "Partial " : "Final ", *resultstr);
- status = SWITCH_STATUS_SUCCESS;
+ if (is_partial) {
+ status = SWITCH_STATUS_MORE_DATA;
+ } else {
+ status = SWITCH_STATUS_SUCCESS;
+ }
} else if (switch_test_flag(context, ASRFLAG_NOINPUT_TIMEOUT)) {
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(context->channel_uuid), SWITCH_LOG_DEBUG, "Result: NO INPUT\n");
} else if (!strcasecmp("confidence", param) && fval >= 0.0) {
context->result_confidence = fval;
switch_log_printf(SWITCH_CHANNEL_UUID_LOG(context->channel_uuid), SWITCH_LOG_DEBUG, "confidence = %f\n", fval);
+ } else if (!strcasecmp("partial", param) && switch_true(val)) {
+ context->partial = 3;
+ switch_log_printf(SWITCH_CHANNEL_UUID_LOG(context->channel_uuid), SWITCH_LOG_DEBUG, "partial = %d\n", context->partial);
}
}
}
char *result = NULL;
- switch_status_t status = switch_ivr_play_and_detect_speech(session, file, engine, grammar, &result, 0, NULL);
+ switch_status_t status = switch_ivr_play_and_detect_speech(session, file, engine, grammar, &result, 0, ap);
if (status == SWITCH_STATUS_SUCCESS) {
// good
} else if (status == SWITCH_STATUS_GENERR) {
} else if (status == SWITCH_STATUS_NOT_INITALIZED) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "ASR INIT ERROR\n");
} else {
- switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "ERROR\n");
+ switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "ERROR status = %d\n", status);
}
end_allow_threads();
- return result; // remeber to free me
+ return result ? strdup(result) : NULL; // remeber to free me
}
SWITCH_DECLARE(void) CoreSession::say(const char *tosay, const char *module_name, const char *say_type, const char *say_method, const char *say_gender)
typedef struct {
int done;
char *result;
+ switch_input_args_t *original_args;
} play_and_detect_speech_state_t;
+static void deliver_asr_event(switch_core_session_t *session, switch_event_t *event, switch_input_args_t *args)
+{
+ if (args && args->input_callback) {
+ args->input_callback(session, (void *)event, SWITCH_INPUT_TYPE_EVENT, args->buf, args->buflen);
+ }
+}
+
static switch_status_t play_and_detect_input_callback(switch_core_session_t *session, void *input, switch_input_type_t input_type, void *data, unsigned int len)
{
play_and_detect_speech_state_t *state = (play_and_detect_speech_state_t *)data;
event = (switch_event_t *)input;
if (event->event_id == SWITCH_EVENT_DETECTED_SPEECH) {
const char *speech_type = switch_event_get_header(event, "Speech-Type");
+
if (!zstr(speech_type)) {
+ deliver_asr_event(session, event, state->original_args);
+
if (!strcasecmp(speech_type, "detected-speech")) {
const char *result;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "(%s) DETECTED SPEECH\n", switch_channel_get_name(channel));
} else {
state->result = "";
}
+ state->original_args = NULL;
state->done = PLAY_AND_DETECT_DONE_RECOGNIZING;
return SWITCH_STATUS_BREAK;
+ } else if (!strcasecmp(speech_type, "detected-partial-speech")) {
+ // ok
} else if (!strcasecmp(speech_type, "begin-speaking")) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "(%s) START OF SPEECH\n", switch_channel_get_name(channel));
return SWITCH_STATUS_BREAK;
state->done = PLAY_AND_DETECT_DONE_RECOGNIZING;
state->result = "";
return SWITCH_STATUS_BREAK;
+ } else {
+ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "unhandled speech type %s\n", speech_type);
}
}
}
switch_status_t status = SWITCH_STATUS_FALSE;
int recognizing = 0;
switch_input_args_t myargs = { 0 };
- play_and_detect_speech_state_t state = { 0, "" };
+ play_and_detect_speech_state_t state = { 0, "", NULL };
switch_channel_t *channel = switch_core_session_get_channel(session);
arg_recursion_check_start(args);
if (!input_timeout) input_timeout = 5000;
- if (!args) {
- args = &myargs;
- }
-
/* start speech detection */
if ((status = switch_ivr_detect_speech(session, mod_name, grammar, "", NULL, NULL)) != SWITCH_STATUS_SUCCESS) {
/* map SWITCH_STATUS_FALSE to SWITCH_STATUS_GENERR to indicate grammar load failed
recognizing = 1;
/* play the prompt, looking for detection result */
- args->input_callback = play_and_detect_input_callback;
- args->buf = &state;
- args->buflen = sizeof(state);
- status = switch_ivr_play_file(session, NULL, file, args);
- if (args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
+ if (args) {
+ state.original_args = args;
+ myargs.dmachine = args->dmachine;
+ }
+
+ myargs.input_callback = play_and_detect_input_callback;
+ myargs.buf = &state;
+ myargs.buflen = sizeof(state);
+
+ status = switch_ivr_play_file(session, NULL, file, &myargs);
+
+ if (args && args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
state.done |= PLAY_AND_DETECT_DONE;
goto done;
}
switch_ivr_detect_speech_start_input_timers(session);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "(%s) WAITING FOR RESULT\n", switch_channel_get_name(channel));
while (!state.done && switch_channel_ready(channel)) {
- status = switch_ivr_sleep(session, input_timeout, SWITCH_FALSE, args);
+ status = switch_ivr_sleep(session, input_timeout, SWITCH_FALSE, &myargs);
- if (args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
+ if (args && args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
state.done |= PLAY_AND_DETECT_DONE;
goto done;
}
}
}
-
-
done:
if (recognizing && !(state.done & PLAY_AND_DETECT_DONE_RECOGNIZING)) {
switch_ivr_pause_detect_speech(session);
char terminator;
switch_time_t last_digit_time;
switch_bool_t is_speech;
+ switch_input_args_t *original_args;
} switch_collect_input_state_t;
+static void deliver_asr_event(switch_core_session_t *session, switch_event_t *event, switch_input_args_t *args)
+{
+ if (args && args->input_callback) {
+ args->input_callback(session, (void *)event, SWITCH_INPUT_TYPE_EVENT, args->buf, args->buflen);
+ }
+}
+
static switch_status_t switch_collect_input_callback(switch_core_session_t *session, void *input, switch_input_type_t input_type, void *data, unsigned int len)
{
switch_collect_input_state_t *state = (switch_collect_input_state_t *)data;
if (zstr(speech_type)) return SWITCH_STATUS_SUCCESS;
+ deliver_asr_event(session, event, state->original_args);
+
if (!strcasecmp(speech_type, "detected-speech")) {
const char *result = switch_event_get_body(event);
/* stop waiting for speech */
switch_set_flag(state, SWITCH_COLLECT_INPUT_SPEECH_DONE);
- switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "(%s) DETECTED SPEECH\n", switch_channel_get_name(channel));
+ switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "(%s) DETECTED SPEECH %s\n", switch_channel_get_name(channel), speech_type);
if (!zstr(result)) {
state->recognition_result = cJSON_Parse(result);
}
}
return SWITCH_STATUS_BREAK;
- }
-
- if (!strcasecmp("closed", speech_type)) {
+ } else if (!strcasecmp(speech_type, "detected-partial-speech")) {
+ } else if (!strcasecmp("closed", speech_type)) {
/* stop waiting for speech */
switch_set_flag(state, SWITCH_COLLECT_INPUT_SPEECH_DONE);
return SWITCH_STATUS_BREAK;
- }
-
- if (!strcasecmp(speech_type, "begin-speaking")) {
+ } else if (!strcasecmp(speech_type, "begin-speaking")) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "(%s) START OF SPEECH\n", switch_channel_get_name(channel));
state->is_speech = SWITCH_TRUE;
/* barge in on prompt */
return SWITCH_STATUS_BREAK;
}
+ } else {
+ switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "Unhandled Speech-Type %s\n", speech_type);
}
}
}
}
- if (!args) {
- args = &myargs;
- }
-
/* start speech recognition, if enabled */
if (recognizer_grammar && recognizer_mod_name) {
if ((status = switch_ivr_detect_speech(session, recognizer_mod_name, recognizer_grammar, "", NULL, NULL)) != SWITCH_STATUS_SUCCESS) {
}
/* play the prompt, looking for input result */
- args->input_callback = switch_collect_input_callback;
- args->buf = &state;
- args->buflen = sizeof(state);
+
+ if (args) {
+ state.original_args = args;
+ myargs.dmachine = args->dmachine;
+ }
+
+ myargs.input_callback = switch_collect_input_callback;
+ myargs.buf = &state;
+ myargs.buflen = sizeof(state);
+
+
switch_set_flag(&state, SWITCH_COLLECT_INPUT_PROMPT);
- status = switch_ivr_play_file(session, NULL, prompt, args);
+ status = switch_ivr_play_file(session, NULL, prompt, &myargs);
switch_clear_flag(&state, SWITCH_COLLECT_INPUT_PROMPT);
- if (args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
+ if (args && args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
switch_set_flag(&state, SWITCH_COLLECT_INPUT_DIGITS_DONE);
switch_set_flag(&state, SWITCH_COLLECT_INPUT_SPEECH_DONE);
status = SWITCH_STATUS_SUCCESS;
while ((!switch_test_flag(&state, SWITCH_COLLECT_INPUT_DIGITS_DONE) || !switch_test_flag(&state, SWITCH_COLLECT_INPUT_SPEECH_DONE))
&& switch_channel_ready(channel)) {
- status = switch_ivr_sleep(session, sleep_time, SWITCH_FALSE, args);
+ status = switch_ivr_sleep(session, sleep_time, SWITCH_FALSE, &myargs);
- if (args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
+ if (args && args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
// dmachine done
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "(%s) DMACHINE DONE\n", switch_channel_get_name(channel));
switch_set_flag(&state, SWITCH_COLLECT_INPUT_DIGITS_DONE);
#include <test/switch_test.h>
+static switch_status_t partial_play_and_collect_input_callback(switch_core_session_t *session, void *input, switch_input_type_t input_type, void *data, __attribute__((unused))unsigned int len)
+{
+ switch_status_t status = SWITCH_STATUS_SUCCESS;
+ int *count = (int *)data;
+
+ if (input_type == SWITCH_INPUT_TYPE_EVENT) {
+ switch_event_t *event = (switch_event_t *)input;
+
+ if (event->event_id == SWITCH_EVENT_DETECTED_SPEECH) {
+ const char *speech_type = switch_event_get_header(event, "Speech-Type");
+
+ if (zstr(speech_type) || strcmp(speech_type, "detected-partial-speech")) {
+ return status;
+ }
+
+ (*count)++;
+ switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "partial events count: %d\n", *count);
+
+ char *body = switch_event_get_body(event);
+ switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_NOTICE, "body=[%s]\n", body);
+ }
+ } else if (input_type == SWITCH_INPUT_TYPE_DTMF) {
+ // never mind
+ }
+
+ return status;
+}
+
FST_CORE_BEGIN("./conf_playsay")
{
FST_SUITE_BEGIN(switch_ivr_play_say)
fst_sched_recv_dtmf("+2", "2");
fst_sched_recv_dtmf("+3", "3");
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_check(recognition_result == NULL);
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_string_equals(cJSON_GetObjectCstr(recognition_result, "text"), NULL);
fst_check_string_equals(digits_collected, "123");
fst_check(terminator_collected == 0);
+ cJSON_Delete(recognition_result);
}
FST_SESSION_END()
- FST_SESSION_BEGIN(play_and_collect_input)
+ FST_SESSION_BEGIN(play_and_collect_input_success)
{
char terminator_collected = 0;
char *digits_collected = NULL;
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_check(recognition_result == NULL);
// check results
fst_check_duration(2500, 1000); // should return immediately when term digit is received
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_check(recognition_result == NULL);
// check results
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_duration(7000, 1000); // should return after timeout when prompt finishes playing
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_check(recognition_result == NULL);
// check results
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_duration(2500, 1000); // should return after timeout when prompt finishes playing
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_check(recognition_result == NULL);
// check results
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_duration(10000, 1000); // should return when dtmf terminator is pressed
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_requires(recognition_result);
// check results
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_duration(2500, 1000); // returns when utterance is done
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_check(recognition_result == NULL);
// check results
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_duration(2500, 1000); // returns when single digit is pressed
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_check(recognition_result == NULL);
// check results
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_duration(2000, 1000); // returns when single digit is pressed
recognition_result = NULL;
fst_time_mark();
status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
-
+ fst_requires(recognition_result);
// check results
fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
fst_check_duration(7000, 1000); // inter-digit timeout after 2nd digit pressed
recognition_result = NULL;
}
FST_SESSION_END()
+
+ FST_SESSION_BEGIN(play_and_collect_input_partial)
+ {
+ char terminator_collected = 0;
+ char *digits_collected = NULL;
+ cJSON *recognition_result = NULL;
+
+ // args
+ const char *play_files = "silence_stream://1000";
+ const char *speech_engine = "test";
+ const char *terminators = "#";
+ int min_digits = 1;
+ int max_digits = 99;
+ int digit_timeout = 500;
+ int no_input_timeout = digit_timeout;
+ int speech_complete_timeout = digit_timeout;
+ int speech_recognition_timeout = 60000;
+ char *speech_grammar_args = switch_core_session_sprintf(fst_session, "{start-input-timers=false,no-input-timeout=%d,vad-silence-ms=%d,speech-timeout=%d,language=en-US,partial=true}default",
+ no_input_timeout, speech_complete_timeout, speech_recognition_timeout);
+ switch_status_t status;
+
+ switch_ivr_displace_session(fst_session, "file_string://silence_stream://500,0!tone_stream://%%(2000,0,350,440)", 0, "r");
+ terminator_collected = 0;
+ digits_collected = NULL;
+ if (recognition_result) cJSON_Delete(recognition_result);
+ recognition_result = NULL;
+ fst_time_mark();
+ status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, NULL);
+ fst_requires(recognition_result);
+ // check results
+ fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
+ fst_check_duration(2500, 1000); // returns when utterance is done
+ fst_check_string_equals(cJSON_GetObjectCstr(recognition_result, "text"), "agent");
+ fst_check_string_equals(digits_collected, NULL);
+ fst_check(terminator_collected == 0);
+
+
+ switch_ivr_displace_session(fst_session, "file_string://silence_stream://500,0!tone_stream://%%(2000,0,350,440)", 0, "r");
+ terminator_collected = 0;
+ digits_collected = NULL;
+ if (recognition_result) cJSON_Delete(recognition_result);
+ recognition_result = NULL;
+
+ switch_input_args_t collect_input_args = { 0 };
+ switch_input_args_t *args = NULL;
+ int count = 0;
+
+ args = &collect_input_args;
+ args->input_callback = partial_play_and_collect_input_callback;
+ args->buf = &count;
+ args->buflen = sizeof(int);
+
+ fst_time_mark();
+ status = switch_ivr_play_and_collect_input(fst_session, play_files, speech_engine, speech_grammar_args, min_digits, max_digits, terminators, digit_timeout, &recognition_result, &digits_collected, &terminator_collected, args);
+ fst_requires(recognition_result);
+ // check results
+ fst_check(status == SWITCH_STATUS_SUCCESS); // might be break?
+ fst_check_duration(2500, 1000); // returns when utterance is done
+ fst_check_string_equals(cJSON_GetObjectCstr(recognition_result, "text"), "agent");
+ fst_check_string_equals(digits_collected, NULL);
+ fst_check(terminator_collected == 0);
+ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "xxx count = %d\n", count);
+ fst_check(count == 3); // 3 partial results
+ cJSON_Delete(recognition_result);
+ }
+ FST_SESSION_END()
}
FST_SUITE_END()
}