Home | History | Annotate | Download | only in crec
      1 /*---------------------------------------------------------------------------*
      2  *  srec_eosd.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include"portable.h"
     21 #include"passert.h"
     22 #include"srec.h"
     23 #include"srec_eosd.h"
     24 #include"srec_context.h"
     25 #include"word_lattice.h"
     26 
     27 void srec_eosd_allocate(srec_eos_detector_parms** peosd,
     28                         int eos_costdelta,
     29                         int opt_eos_costdelta,
     30                         int terminal_timeout,
     31                         int optional_terminal_timeout,
     32                         int non_terminal_timeout,
     33                         int max_speech_duration)
     34 {
     35   srec_eos_detector_parms* eosd;
     36   eosd = (srec_eos_detector_parms*)CALLOC(1, sizeof(srec_eos_detector_parms), "search.endpointer");
     37   eosd->eos_costdelta        = (frameID)eos_costdelta;
     38   eosd->opt_eos_costdelta    = (frameID)opt_eos_costdelta;
     39   eosd->endnode_timeout      = (frameID)terminal_timeout;
     40   eosd->optendnode_timeout   = (frameID)optional_terminal_timeout;
     41   eosd->internalnode_timeout = (frameID)non_terminal_timeout;
     42   eosd->inspeech_timeout     = (frameID)max_speech_duration;
     43   *peosd = eosd;
     44 }
     45 
     46 void srec_eosd_destroy(srec_eos_detector_parms* eosd)
     47 {
     48   FREE(eosd);
     49 }
     50 
     51 /* The current algorithm does not make use of most of the frmcnt counters,
     52    rather we look at the eos frame from the final end node search state
     53    and comparrer with the current frame.  The new method is less sensitive
     54    to background noise.
     55 
     56    The 1.9 method had a blatant bug in that we were reseting the optend_frmnt
     57    when there were no live alternative tokens, ie xftoken == NUL was causing
     58    reset!
     59 */
     60 
     61 void srec_eosd_state_reset(srec_eos_detector_state* eosd_state)
     62 {
     63   eosd_state->endnode_frmcnt = 0;
     64   eosd_state->optendnode_frmcnt = 0;
     65   eosd_state->internalnode_frmcnt = 0;
     66   eosd_state->inspeech_frmcnt = 0;
     67   eosd_state->internalnode_node_index = MAXnodeID;
     68 }
     69 
     70 EOSrc srec_check_end_of_speech_end(srec* rec)
     71 {
     72   EOSrc rc = SPEECH_MAYBE_ENDED;
     73   return rc;
     74 }
     75 
     76 EOSrc srec_check_end_of_speech(srec_eos_detector_parms* eosd_parms, srec* rec)
     77 {
     78   nodeID end_node;
     79   EOSrc rc = VALID_SPEECH_CONTINUING;
     80   bigcostdata eos_cost_margin;
     81   bigcostdata opteos_cost_margin;
     82   word_token* last_wtoken;
     83   int nframes_since_eos;
     84 
     85   fsmnode_token *ftoken, *eftoken, *oeftoken, *xftoken;
     86   ftokenID ftoken_index, eftoken_index, oeftoken_index, xftoken_index;
     87   costdata wrapup_cost = rec->context->wrapup_cost;
     88   srec_eos_detector_state* eosd_state = &rec->eosd_state;
     89 
     90   if (rec->current_search_frame == 1)
     91     srec_eosd_state_reset(eosd_state);
     92 
     93   end_node = rec->context->end_node;
     94   eftoken_index = rec->best_token_for_node[ end_node];
     95   if (eftoken_index != MAXftokenID)
     96     eftoken = &rec->fsmnode_token_array[ eftoken_index];
     97   else
     98     eftoken = NULL;
     99 
    100   xftoken_index  = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
    101   if (xftoken_index != MAXftokenID)
    102     xftoken = &rec->fsmnode_token_array[ xftoken_index];
    103   else
    104     xftoken = NULL;
    105 
    106   oeftoken_index = rec->current_best_ftoken_index[NODE_INFO_OPTENDN];
    107   if (oeftoken_index != MAXftokenID)
    108     oeftoken = &rec->fsmnode_token_array[ oeftoken_index];
    109   else
    110     oeftoken = NULL;
    111 
    112 
    113   if (rec->srec_ended)
    114     rc = SPEECH_MAYBE_ENDED;
    115   else if (rec->current_search_frame >= rec->word_lattice->max_frames - 1
    116            || rec->current_search_frame >= eosd_parms->inspeech_timeout)
    117   {
    118     /* here we will need to differentiate max_frames from
    119        num_frames_allocated */
    120     if (eftoken_index != MAXftokenID)
    121       rc = SPEECH_ENDED;
    122     else
    123       rc = SPEECH_TOO_LONG;
    124   }
    125   else
    126   {
    127 
    128     /* reset the internal counter? */
    129     ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
    130     if (ftoken_index != MAXftokenID)
    131     {
    132       ftoken = &rec->fsmnode_token_array[ ftoken_index];
    133       if (eosd_state->internalnode_node_index != ftoken->FSMnode_index)
    134       {
    135         eosd_state->internalnode_node_index = ftoken->FSMnode_index;
    136         eosd_state->internalnode_frmcnt = 1;
    137       }
    138       else
    139       {
    140         if (ftoken->word != rec->context->beg_silence_word)
    141           eosd_state->internalnode_frmcnt++;
    142       }
    143     }
    144     else
    145     {
    146       eosd_state->internalnode_frmcnt = 1;
    147       eosd_state->internalnode_node_index = MAXnodeID;
    148     }
    149 
    150     /* nframes since eos */
    151     if (eftoken)
    152     {
    153       last_wtoken = NULL;
    154       if (eftoken->word_backtrace != MAXwtokenID)
    155       {
    156         last_wtoken = &rec->word_token_array[eftoken->word_backtrace];
    157         nframes_since_eos = rec->current_search_frame - last_wtoken->end_time;
    158       }
    159       else
    160         nframes_since_eos = 0;
    161     }
    162     else
    163       nframes_since_eos = 0;
    164 
    165     /* eos cost margin */
    166     if (!eftoken)
    167     {
    168       eos_cost_margin = 0;
    169     }
    170     else if (!oeftoken && !xftoken)
    171     {
    172       eos_cost_margin = MAXcostdata;
    173     }
    174     else if (!oeftoken)
    175     {
    176       eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
    177     }
    178     else if (!xftoken)
    179     {
    180       eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
    181     }
    182     else if (oeftoken->cost > eftoken->cost)
    183     {
    184       eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
    185     }
    186     else
    187     { /* if(oeftoken->cost < eftoken->cost) */
    188       eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
    189     }
    190 
    191     /* opteos cost margin */
    192     if (!eftoken)
    193     {
    194       opteos_cost_margin = 0;
    195     }
    196     else if (!oeftoken)
    197     {
    198       opteos_cost_margin = 0;
    199     }
    200     else if (!xftoken)
    201     {
    202       opteos_cost_margin = MAXcostdata;
    203     }
    204     else
    205     {
    206       opteos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
    207     }
    208 
    209     if (eftoken)
    210     {
    211       if (oeftoken && nframes_since_eos > eosd_parms->optendnode_timeout
    212           && opteos_cost_margin > eosd_parms->eos_costdelta)
    213       {
    214         rc = SPEECH_ENDED;
    215 
    216       }
    217       else if (!oeftoken && nframes_since_eos > eosd_parms->endnode_timeout
    218                && eos_cost_margin > eosd_parms->eos_costdelta)
    219       {
    220         rc = SPEECH_ENDED;
    221 
    222       }
    223       else if (nframes_since_eos > eosd_parms->optendnode_timeout
    224                && eos_cost_margin > eosd_parms->eos_costdelta)
    225       {
    226         rc = SPEECH_ENDED;
    227 
    228       }
    229       else
    230       {
    231         rc = VALID_SPEECH_CONTINUING;
    232       }
    233     }
    234 
    235     /* reached internal timeout, ie at same node for so long? */
    236     if (eosd_state->internalnode_frmcnt >= eosd_parms->internalnode_timeout)
    237     {
    238       /* PLogMessage("eosd_state->internalnode_frmcnt %d eosd_parms->internalnode_timeout %d\n", eosd_state->internalnode_frmcnt, eosd_parms->internalnode_timeout); */
    239       ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
    240       ftoken = &rec->fsmnode_token_array [ ftoken_index];
    241       /* sprintf(buf, "eos rec%d@%d,%d i%d> ", rec->id,
    242       rec->current_search_frame, ftoken->FSMnode_index,
    243       eosd_state->internalnode_frmcnt);
    244       PLogMessage(buf);
    245       sprint_word_token_backtrace(buf,sizeof(buf),rec,ftoken->word_backtrace);
    246       PLogMessage(" %s\n", buf); */
    247       rc = SPEECH_ENDED;
    248     }
    249   }
    250 
    251   /* the endnode will never win against an optend node because
    252      the cost at endnode is the same or worse (even wrapup_cost adjustment) */
    253 
    254 
    255 
    256   /* so we need to check for optend nodes separately here
    257      but we really need to remember best_optendnode_index, best_endnode_index
    258      best_nonendnode_index */
    259   return rc;
    260 }
    261