1 /*---------------------------------------------------------------------------* 2 * srec_eosd.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #include"portable.h" 21 #include"passert.h" 22 #include"srec.h" 23 #include"srec_eosd.h" 24 #include"srec_context.h" 25 #include"word_lattice.h" 26 27 void srec_eosd_allocate(srec_eos_detector_parms** peosd, 28 int eos_costdelta, 29 int opt_eos_costdelta, 30 int terminal_timeout, 31 int optional_terminal_timeout, 32 int non_terminal_timeout, 33 int max_speech_duration) 34 { 35 srec_eos_detector_parms* eosd; 36 eosd = (srec_eos_detector_parms*)CALLOC(1, sizeof(srec_eos_detector_parms), "search.endpointer"); 37 eosd->eos_costdelta = (frameID)eos_costdelta; 38 eosd->opt_eos_costdelta = (frameID)opt_eos_costdelta; 39 eosd->endnode_timeout = (frameID)terminal_timeout; 40 eosd->optendnode_timeout = (frameID)optional_terminal_timeout; 41 eosd->internalnode_timeout = (frameID)non_terminal_timeout; 42 eosd->inspeech_timeout = (frameID)max_speech_duration; 43 *peosd = eosd; 44 } 45 46 void srec_eosd_destroy(srec_eos_detector_parms* eosd) 47 { 48 FREE(eosd); 49 } 50 51 /* The current algorithm does not make use of most of the frmcnt counters, 52 rather we look at the eos frame from the final end node search state 53 and comparrer with the current frame. The new method is less sensitive 54 to background noise. 55 56 The 1.9 method had a blatant bug in that we were reseting the optend_frmnt 57 when there were no live alternative tokens, ie xftoken == NUL was causing 58 reset! 59 */ 60 61 void srec_eosd_state_reset(srec_eos_detector_state* eosd_state) 62 { 63 eosd_state->endnode_frmcnt = 0; 64 eosd_state->optendnode_frmcnt = 0; 65 eosd_state->internalnode_frmcnt = 0; 66 eosd_state->inspeech_frmcnt = 0; 67 eosd_state->internalnode_node_index = MAXnodeID; 68 } 69 70 EOSrc srec_check_end_of_speech_end(srec* rec) 71 { 72 EOSrc rc = SPEECH_MAYBE_ENDED; 73 return rc; 74 } 75 76 EOSrc srec_check_end_of_speech(srec_eos_detector_parms* eosd_parms, srec* rec) 77 { 78 nodeID end_node; 79 EOSrc rc = VALID_SPEECH_CONTINUING; 80 bigcostdata eos_cost_margin; 81 bigcostdata opteos_cost_margin; 82 word_token* last_wtoken; 83 int nframes_since_eos; 84 85 fsmnode_token *ftoken, *eftoken, *oeftoken, *xftoken; 86 ftokenID ftoken_index, eftoken_index, oeftoken_index, xftoken_index; 87 costdata wrapup_cost = rec->context->wrapup_cost; 88 srec_eos_detector_state* eosd_state = &rec->eosd_state; 89 90 if (rec->current_search_frame == 1) 91 srec_eosd_state_reset(eosd_state); 92 93 end_node = rec->context->end_node; 94 eftoken_index = rec->best_token_for_node[ end_node]; 95 if (eftoken_index != MAXftokenID) 96 eftoken = &rec->fsmnode_token_array[ eftoken_index]; 97 else 98 eftoken = NULL; 99 100 xftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR]; 101 if (xftoken_index != MAXftokenID) 102 xftoken = &rec->fsmnode_token_array[ xftoken_index]; 103 else 104 xftoken = NULL; 105 106 oeftoken_index = rec->current_best_ftoken_index[NODE_INFO_OPTENDN]; 107 if (oeftoken_index != MAXftokenID) 108 oeftoken = &rec->fsmnode_token_array[ oeftoken_index]; 109 else 110 oeftoken = NULL; 111 112 113 if (rec->srec_ended) 114 rc = SPEECH_MAYBE_ENDED; 115 else if (rec->current_search_frame >= rec->word_lattice->max_frames - 1 116 || rec->current_search_frame >= eosd_parms->inspeech_timeout) 117 { 118 /* here we will need to differentiate max_frames from 119 num_frames_allocated */ 120 if (eftoken_index != MAXftokenID) 121 rc = SPEECH_ENDED; 122 else 123 rc = SPEECH_TOO_LONG; 124 } 125 else 126 { 127 128 /* reset the internal counter? */ 129 ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR]; 130 if (ftoken_index != MAXftokenID) 131 { 132 ftoken = &rec->fsmnode_token_array[ ftoken_index]; 133 if (eosd_state->internalnode_node_index != ftoken->FSMnode_index) 134 { 135 eosd_state->internalnode_node_index = ftoken->FSMnode_index; 136 eosd_state->internalnode_frmcnt = 1; 137 } 138 else 139 { 140 if (ftoken->word != rec->context->beg_silence_word) 141 eosd_state->internalnode_frmcnt++; 142 } 143 } 144 else 145 { 146 eosd_state->internalnode_frmcnt = 1; 147 eosd_state->internalnode_node_index = MAXnodeID; 148 } 149 150 /* nframes since eos */ 151 if (eftoken) 152 { 153 last_wtoken = NULL; 154 if (eftoken->word_backtrace != MAXwtokenID) 155 { 156 last_wtoken = &rec->word_token_array[eftoken->word_backtrace]; 157 nframes_since_eos = rec->current_search_frame - last_wtoken->end_time; 158 } 159 else 160 nframes_since_eos = 0; 161 } 162 else 163 nframes_since_eos = 0; 164 165 /* eos cost margin */ 166 if (!eftoken) 167 { 168 eos_cost_margin = 0; 169 } 170 else if (!oeftoken && !xftoken) 171 { 172 eos_cost_margin = MAXcostdata; 173 } 174 else if (!oeftoken) 175 { 176 eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost; 177 } 178 else if (!xftoken) 179 { 180 eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost; 181 } 182 else if (oeftoken->cost > eftoken->cost) 183 { 184 eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost; 185 } 186 else 187 { /* if(oeftoken->cost < eftoken->cost) */ 188 eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost; 189 } 190 191 /* opteos cost margin */ 192 if (!eftoken) 193 { 194 opteos_cost_margin = 0; 195 } 196 else if (!oeftoken) 197 { 198 opteos_cost_margin = 0; 199 } 200 else if (!xftoken) 201 { 202 opteos_cost_margin = MAXcostdata; 203 } 204 else 205 { 206 opteos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost; 207 } 208 209 if (eftoken) 210 { 211 if (oeftoken && nframes_since_eos > eosd_parms->optendnode_timeout 212 && opteos_cost_margin > eosd_parms->eos_costdelta) 213 { 214 rc = SPEECH_ENDED; 215 216 } 217 else if (!oeftoken && nframes_since_eos > eosd_parms->endnode_timeout 218 && eos_cost_margin > eosd_parms->eos_costdelta) 219 { 220 rc = SPEECH_ENDED; 221 222 } 223 else if (nframes_since_eos > eosd_parms->optendnode_timeout 224 && eos_cost_margin > eosd_parms->eos_costdelta) 225 { 226 rc = SPEECH_ENDED; 227 228 } 229 else 230 { 231 rc = VALID_SPEECH_CONTINUING; 232 } 233 } 234 235 /* reached internal timeout, ie at same node for so long? */ 236 if (eosd_state->internalnode_frmcnt >= eosd_parms->internalnode_timeout) 237 { 238 /* PLogMessage("eosd_state->internalnode_frmcnt %d eosd_parms->internalnode_timeout %d\n", eosd_state->internalnode_frmcnt, eosd_parms->internalnode_timeout); */ 239 ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR]; 240 ftoken = &rec->fsmnode_token_array [ ftoken_index]; 241 /* sprintf(buf, "eos rec%d@%d,%d i%d> ", rec->id, 242 rec->current_search_frame, ftoken->FSMnode_index, 243 eosd_state->internalnode_frmcnt); 244 PLogMessage(buf); 245 sprint_word_token_backtrace(buf,sizeof(buf),rec,ftoken->word_backtrace); 246 PLogMessage(" %s\n", buf); */ 247 rc = SPEECH_ENDED; 248 } 249 } 250 251 /* the endnode will never win against an optend node because 252 the cost at endnode is the same or worse (even wrapup_cost adjustment) */ 253 254 255 256 /* so we need to check for optend nodes separately here 257 but we really need to remember best_optendnode_index, best_endnode_index 258 best_nonendnode_index */ 259 return rc; 260 } 261