Home | History | Annotate | Download | only in include
      1 /*---------------------------------------------------------------------------*
      2  *  SR_Recognizer.h  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #ifndef __SR_RECOGNIZER_H
     21 #define __SR_RECOGNIZER_H
     22 
     23 
     24 
     25 #include "ESR_ReturnCode.h"
     26 #include "SR_RecognizerPrefix.h"
     27 #include "SR_AcousticModels.h"
     28 #include "SR_Grammar.h"
     29 #include "SR_RecognizerResult.h"
     30 #include "SR_Nametags.h"
     31 #include "pstdio.h"
     32 #include "ptypes.h"
     33 
     34 /* forward decl needed because of SR_Recognizer.h <-> SR_Grammar.h include loop */
     35 struct SR_Grammar_t;
     36 
     37 /**
     38  * Recognizer status.
     39  */
     40 typedef enum SR_RecognizerStatus_t
     41 {
     42   /**
     43    * Reserved value.
     44    */
     45   SR_RECOGNIZER_EVENT_INVALID,
     46   /**
     47    * Recognizer could not find a match for the utterance.
     48    */
     49   SR_RECOGNIZER_EVENT_NO_MATCH,
     50   /**
     51    * Recognizer processed one frame of audio.
     52    */
     53   SR_RECOGNIZER_EVENT_INCOMPLETE,
     54   /**
     55    * Recognizer has just been started.
     56    */
     57   SR_RECOGNIZER_EVENT_STARTED,
     58   /**
     59    * Recognizer is stopped.
     60    */
     61   SR_RECOGNIZER_EVENT_STOPPED,
     62   /**
     63    * Beginning of speech detected.
     64    */
     65   SR_RECOGNIZER_EVENT_START_OF_VOICING,
     66   /**
     67    * End of speech detected.
     68    */
     69   SR_RECOGNIZER_EVENT_END_OF_VOICING,
     70   /**
     71    * Beginning of utterance occured too soon.
     72    */
     73   SR_RECOGNIZER_EVENT_SPOKE_TOO_SOON,
     74   /**
     75    * Recognition match detected.
     76    */
     77   SR_RECOGNIZER_EVENT_RECOGNITION_RESULT,
     78   /**
     79    * Timeout occured before beginning of utterance.
     80    */
     81   SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT,
     82   /**
     83    * Timeout occured before speech recognition could complete.
     84    */
     85   SR_RECOGNIZER_EVENT_RECOGNITION_TIMEOUT,
     86   /**
     87    * Not enough samples to process one frame.
     88    */
     89   SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO,
     90   /**
     91    * More audio encountered than is allowed by 'swirec_max_speech_duration'.
     92    */
     93   SR_RECOGNIZER_EVENT_MAX_SPEECH,
     94 } SR_RecognizerStatus;
     95 
     96 /**
     97  * Type of RecognizerResult returned by SR_RecognizerAdvance().
     98  */
     99 typedef enum SR_RecognizerResultType_t
    100 {
    101   /**
    102    * Reserved value.
    103    */
    104   SR_RECOGNIZER_RESULT_TYPE_INVALID,
    105   /**
    106    * The result is complete from a full recognition of audio.
    107    */
    108   SR_RECOGNIZER_RESULT_TYPE_COMPLETE,
    109   /**
    110    * No results at this time.
    111    */
    112   SR_RECOGNIZER_RESULT_TYPE_NONE,
    113 } SR_RecognizerResultType;
    114 
    115 /**
    116  * SR_Utterance stubbed out.
    117  */
    118 typedef void* SR_Utterance;
    119 
    120 typedef enum
    121 {
    122   ESR_LOCK,
    123   ESR_UNLOCK
    124 } ESR_LOCKMODE;
    125 
    126 /**
    127  * Function which will be invoked before accessing internal variables.
    128  */
    129 typedef ESR_ReturnCode(*SR_RecognizerLockFunction)(ESR_LOCKMODE mode, void* data);
    130 
    131 /**
    132  * @addtogroup SR_RecognizerModule SR_Recognizer API functions
    133  * Synchronous speech recognizer.
    134  *
    135  * @{
    136  */
    137 
    138 /**
    139  * Synchronous speech recognizer.
    140  */
    141 typedef struct SR_Recognizer_t
    142 {
    143   /**
    144    * Starts recognition.
    145    *
    146    * @param self SR_Recognizer handle
    147   * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer,
    148   * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason
    149    */
    150   ESR_ReturnCode(*start)(struct SR_Recognizer_t* self);
    151   /**
    152    * Stops the recognizer and invalidates the recognition result object.
    153    * Calling this function before the recognizer receives the last frame causes the recognition
    154    * to abort.
    155    *
    156    * @param self SR_Recognizer handle
    157    * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
    158    */
    159   ESR_ReturnCode(*stop)(struct SR_Recognizer_t* self);
    160   /**
    161    * Destroy a recognizer.
    162    *
    163    * @param self SR_Recognizer handle
    164   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
    165    */
    166   ESR_ReturnCode(*destroy)(struct SR_Recognizer_t* self);
    167   /**
    168    * Associates a set of models with the recognizer.
    169    *
    170    * @param self SR_Recognizer handle
    171   * @return ESR_INVALID_ARGUMENT if self is null
    172    */
    173   ESR_ReturnCode(*setup)(struct SR_Recognizer_t* self);
    174   /**
    175    * Unconfigures recognizer.
    176    *
    177    * @param self SR_Recognizer handle
    178   * @return ESR_INVALID_ARGUMENT if self is null
    179    */
    180   ESR_ReturnCode(*unsetup)(struct SR_Recognizer_t* self);
    181   /**
    182    * Indicates whether recognizer is configured for use.
    183    *
    184    * @param self SR_Recognizer handle
    185    * @param isSetup True if recognizer is configured
    186   * @return ESR_INVALID_ARGUMENT if self is null
    187    */
    188   ESR_ReturnCode(*isSetup)(struct SR_Recognizer_t* self, ESR_BOOL* isSetup);
    189 
    190   /**
    191    * Returns copy of LCHAR recognition parameter.
    192    *
    193    * @param self SR_Recognizer handle
    194    * @param key Parameter name
    195    * @param value [out] Used to hold the parameter value
    196    * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW,
    197    *            the required length is returned in this variable.
    198   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
    199   * type LCHAR*
    200    */
    201   ESR_ReturnCode(*getParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value, size_t* len);
    202   /**
    203    * Return copy of size_t recognition parameter.
    204    *
    205    * @param self SR_Recognizer handle
    206    * @param key Parameter name
    207    * @param value [out] Used to hold the parameter value
    208   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
    209   * type size_t
    210    */
    211   ESR_ReturnCode(*getSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t* value);
    212   /**
    213    * Return copy of BOOL recognition parameter.
    214    *
    215    * @param self SR_Recognizer handle
    216    * @param key Parameter name
    217    * @param value [out] Used to hold the parameter value
    218   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
    219   * type bool
    220    */
    221   ESR_ReturnCode(*getBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL* value);
    222   /**
    223    * Sets recognition parameters.
    224    *
    225    * Key:             Description of associated value
    226    *
    227    * VoiceEnrollment       If "true", the next recognition will produce data required
    228    *                              for Nametag support (i.e. Aurora bitstream).
    229    *
    230    * @param self SR_Recognizer handle
    231    * @param key Parameter name
    232    * @param value Parameter value
    233   * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
    234    */
    235   ESR_ReturnCode(*setParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value);
    236   /**
    237    * Sets recognition parameters.
    238    *
    239    * @param self SR_Recognizer handle
    240    * @param key Parameter name
    241    * @param value Parameter value
    242   * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
    243    */
    244   ESR_ReturnCode(*setSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t value);
    245   /**
    246    * Sets recognition parameters.
    247    *
    248    * @param self SR_Recognizer handle
    249    * @param key Parameter name
    250    * @param value Parameter value
    251   * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
    252    */
    253   ESR_ReturnCode(*setBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL value);
    254 
    255   /**
    256    * Recognizer may be set up with multiple Grammars and multiple rules. All grammars
    257    * must be unsetup before the recognizer can be destroy.
    258    * A pre-compiled Grammar should have undergone a model consistency check with the
    259    * recognizer prior to this call.
    260    *
    261    * @param self SR_Recognizer handle
    262    * @param grammar Grammar containing rule
    263    * @param ruleName Name of rule to associate with recognizer
    264    * @see SR_GrammarCheckModelConsistency
    265    * @return ESR_INVALID_ARGUMENT if self is null
    266    */
    267   ESR_ReturnCode (*setupRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, const LCHAR* ruleName);
    268   /**
    269    * Indicates if Recognizer is configured with any rules within the specified Grammar.
    270    *
    271    * @param self SR_Recognizer handle
    272    * @param hasSetupRules True if the Recognizer is configured for the Grammar
    273   * @return ESR_INVALID_ARGUMENT if self is null
    274    */
    275   ESR_ReturnCode(*hasSetupRules)(struct SR_Recognizer_t* self, ESR_BOOL* hasSetupRules);
    276   /**
    277    * Activates rule in recognizer.
    278    *
    279    * @param self SR_Recognizer handle
    280    * @param grammar Grammar containing rule
    281    * @param ruleName Name of rule
    282    * @param weight Relative weight to assign to self grammar vs. other activated grammars.
    283    *               Values: Integers 0-2^31.
    284   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer,
    285   * or if the rule could not be setup, or if the acoustic models could not be setup;
    286   * ESR_BUFFER_OVERFLOW if ruleName is too long
    287    */
    288   ESR_ReturnCode (*activateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
    289                                 const LCHAR* ruleName, unsigned int weight);
    290   /**
    291    * Deactivates rule in recognizer.
    292    *
    293    * @param self SR_Recognizer handle
    294    * @param grammar Grammar containing rule
    295    * @param ruleName Name of root rule
    296    * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated
    297    */
    298   ESR_ReturnCode (*deactivateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
    299                                   const LCHAR* ruleName);
    300 
    301   /**
    302    * Deactivates all grammar rules in recognizer.
    303    *
    304    * @param self SR_Recognizer handle
    305   * @return ESR_INVALID_ARGUMENT if self is null
    306    */
    307   ESR_ReturnCode(*deactivateAllRules)(struct SR_Recognizer_t* self);
    308 
    309   /**
    310    * Indicates if rule is active in recognizer.
    311    *
    312    * @param self SR_Recognizer handle
    313    * @param grammar Grammar containing rule
    314    * @param ruleName Name of rule
    315    * @param isActiveRule True if rule is active
    316   * @return ESR_INVALID_ARGUMENT if self is null
    317    */
    318   ESR_ReturnCode (*isActiveRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
    319                                 const LCHAR* ruleName, ESR_BOOL* isActiveRule);
    320    /**
    321    * Configures the grammar for maximum amount of word addition
    322    *
    323    * @param self SR_Recognizer handle
    324    * @param grammar Grammar whose ceiling to be set
    325    * @return ESR_INVALID_ARGUMENT if self or grammar are null
    326    */
    327   ESR_ReturnCode (*setWordAdditionCeiling)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar );
    328   /**
    329    * Ensure the model usage in a pre-compiled grammar is consistent with the models
    330    * that are associated with the Recognizer. You must first have called Recognizer_Setup().
    331    *
    332    * @param self SR_Recognizer handle
    333    * @param grammar Grammar to check against
    334    * @param isConsistent True if rule is consistent
    335   * @return ESR_INVALID_ARGUMENT if self is null
    336    */
    337   ESR_ReturnCode (*checkGrammarConsistency)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
    338       ESR_BOOL* isConsistent);
    339 
    340  /**
    341    * Ensure the model usage in a pre-compiled grammar is consistent with the models
    342    * that are associated with the Recognizer. You must first have called Recognizer_Setup().
    343    *
    344    * @param self SR_Recognizer handle
    345    * @param grammar Grammar to check against
    346    * @param isConsistent True if rule is consistent
    347   * @return ESR_INVALID_ARGUMENT if self is null
    348    */
    349   ESR_ReturnCode (*getModels)(struct SR_Recognizer_t* self, SR_AcousticModels** pmodels);
    350 
    351   /**
    352    * Get audio into the recognizer.
    353    *
    354    * We decouple the Audio and frontend processing from the Recognizer processing via an
    355    * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least
    356    * as fast as real time so that voicing events are not unduly delayed. The audio buffer size
    357    * must be at least one frame buffer's worth and some reasonable maximum size for synchronous
    358    * behaviour. This function may be called independently of Recognizer_Advance.
    359    *
    360    * @param self SR_Recognizer handle
    361    * @param buffer Buffer containing audio data
    362    * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow,
    363    *                            ESR_BUFFER_OVERFLOW is returned and this value holds the actual
    364    *                            amount of samples that were pushed.
    365    * @param isLast Indicates if the audio frame is the last one in this recognition
    366   * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't
    367   * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is
    368   * full
    369    */
    370   ESR_ReturnCode (*putAudio)(struct SR_Recognizer_t* self, asr_int16_t* buffer, size_t* bufferSize,
    371                             ESR_BOOL isLast);
    372   /**
    373    * Advance the recognizer by at least one utterance frame. The number of frames advanced
    374    * depends on the underlying definition. We anticipate that the recognizer will keep up with
    375    * the supplied audio buffers when waiting for voicing. After this point, the number of frames
    376    * may be one (for our default frame-advance mode) or it may be more if the synchronous nature
    377    * of this operation is not considered a problem. The recognizer may be advanced independently
    378    * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data.
    379    * A stop condition could be an appropriate consequence.
    380    *
    381    * @param self Recognizer handle
    382    * @param status Resulting recognizer status
    383    * @param type Resulting recognition result type
    384    * @param result Resulting recognizer result
    385   * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs
    386    */
    387   ESR_ReturnCode(*advance)(struct SR_Recognizer_t* self, SR_RecognizerStatus* status,
    388                            SR_RecognizerResultType* type, SR_RecognizerResult** result);
    389 
    390 
    391   /**
    392    * Loads utterance from file.
    393    *
    394    * @param self SR_Recognizer handle
    395    * @param filename File to read from
    396   * @return ESR_INVALID_ARGUMENT if self is null
    397    */
    398   ESR_ReturnCode(*loadUtterance)(struct SR_Recognizer_t* self, const LCHAR* filename);
    399   /**
    400    * Loads utterance from WAVE file.
    401    *
    402    * @param self SR_Recognizer handle
    403    * @param filename WAVE file to read from
    404   * @return ESR_INVALID_ARGUMENT if self is null
    405    */
    406   ESR_ReturnCode(*loadWaveFile)(struct SR_Recognizer_t* self, const LCHAR* filename);
    407 
    408   /**
    409    * Log recognizer-related event token.
    410    *
    411    * @param self SR_Recognizer handle
    412    * @param event Token name
    413    * @param value Value to be logged
    414    * @return ESR_INVALID_ARGUMENT if self is null
    415    */
    416   ESR_ReturnCode(*logToken)(struct SR_Recognizer_t* self, const LCHAR* token, const LCHAR* value);
    417 
    418   /**
    419    * Log recognizer-related event token integer.
    420    *
    421    * @param self SR_Recognizer handle
    422    * @param event Token name
    423    * @param value Value to be logged
    424    * @return ESR_INVALID_ARGUMENT if self is null
    425    */
    426   ESR_ReturnCode(*logTokenInt)(struct SR_Recognizer_t* self, const LCHAR* token, int value);
    427 
    428   /**
    429    * Log recognizer-related event and dump all previously accumulated tokens since last event to
    430    * log.
    431    *
    432    * @param self SR_Recognizer handle
    433    * @param event Event name
    434    * @return ESR_INVALID_ARGUMENT if self is null
    435    */
    436   ESR_ReturnCode(*logEvent)(struct SR_Recognizer_t* self, const LCHAR* event);
    437 
    438   /**
    439    * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions)
    440    * and it is up to the application to decided when the session ends and a new one begins (e.g.
    441    * timeout, number of recognitions, etc.)
    442    *
    443    * @param self SR_Recognizer handle
    444    * @param sessionName Session name
    445    * @return ESR_INVALID_ARGUMENT if self is null
    446    */
    447   ESR_ReturnCode(*logSessionStart)(struct SR_Recognizer_t* self, const LCHAR* sessionName);
    448 
    449   /**
    450    * Log the end of a log session.
    451    *
    452    * @param self SR_Recognizer handle
    453    * @return ESR_INVALID_ARGUMENT if self is null
    454    */
    455   ESR_ReturnCode(*logSessionEnd)(struct SR_Recognizer_t* self);
    456 
    457   /**
    458    * Log data about a waveform obtained from a TCP file. This function is not called
    459    * when doing live recognition.
    460    *
    461    * @param self SR_Recognizer handle
    462    * @param waveformFilename Session name
    463    * @param transcription Transcription for the utterance
    464    * @param bos Beginning of speech (seconds)
    465    * @param eos End of speech (seconds)
    466    * @param isInvocab True if the transcription is accepted by the grammar, False otherwise
    467    * @return ESR_INVALID_ARGUMENT if self is null
    468    */
    469   ESR_ReturnCode(*logWaveformData)(struct SR_Recognizer_t* self,
    470                                    const LCHAR* waveformFilename,
    471                                    const LCHAR* transcription,
    472                                    const double bos,
    473                                    const double eos,
    474                                    ESR_BOOL isInvocab);
    475 
    476   /**
    477    * Associates a locking function with the recognizer. This function is used to
    478    * protect internal data from multithreaded access.
    479    *
    480    * @param self SR_Recognizer handle
    481    * @param function Locking function
    482    * @param data Function data
    483    * @return ESR_INVALID_ARGUMENT if self is null
    484    */
    485   ESR_ReturnCode(*setLockFunction)(struct SR_Recognizer_t *self, SR_RecognizerLockFunction function, void* data);
    486   /**
    487    * Indicates if signal is getting clipped.
    488    *
    489    * @param self SR_Recognizer handle
    490    * @param isClipping [out] Result value
    491    * @return ESR_INVALID_ARGUMENT if self is null
    492    */
    493   ESR_ReturnCode(*isSignalClipping)(struct SR_Recognizer_t* self, ESR_BOOL* isClipping);
    494   /**
    495    * Indicates if signal has a DC-offset component.
    496    *
    497    * @param self SR_Recognizer handle
    498    * @param isDCOffset [out] Result value
    499    * @return ESR_INVALID_ARGUMENT if self is null
    500    */
    501   ESR_ReturnCode(*isSignalDCOffset)(struct SR_Recognizer_t* self, ESR_BOOL* isDCOffset);
    502   /**
    503    * Indicates if signal is noisy.
    504    *
    505    * @param self SR_Recognizer handle
    506    * @param isNoisy [out] Result value
    507    * @return ESR_INVALID_ARGUMENT if self is null
    508    */
    509   ESR_ReturnCode(*isSignalNoisy)(struct SR_Recognizer_t* self, ESR_BOOL* isNoisy);
    510   /**
    511    * Indicates if speech contained within the signal is too quiet.
    512    *
    513    * @param self SR_Recognizer handle
    514    * @param isTooQuiet [out] Result value
    515    * @return ESR_INVALID_ARGUMENT if self is null
    516    */
    517   ESR_ReturnCode(*isSignalTooQuiet)(struct SR_Recognizer_t* self, ESR_BOOL* isTooQuiet);
    518   /**
    519    * Indicates if there are too few samples in the signal for a proper recognition.
    520    *
    521    * @param self SR_Recognizer handle
    522    * @param isTooFewSamples [out] Result value
    523    * @return ESR_INVALID_ARGUMENT if self is null
    524    */
    525   ESR_ReturnCode(*isSignalTooFewSamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooFewSamples);
    526   /**
    527    * Indicates if there are too many samples in the signal for a proper recognition.
    528    *
    529    * @param self SR_Recognizer handle
    530    * @param isTooManySamples [out] Result value
    531    * @return ESR_INVALID_ARGUMENT if self is null
    532    */
    533   ESR_ReturnCode(*isSignalTooManySamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooManySamples);
    534 }
    535 SR_Recognizer;
    536 
    537 /**
    538  * Starts recognition.
    539  *
    540  * @param self SR_Recognizer handle
    541  * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer,
    542  * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason
    543  */
    544 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStart(SR_Recognizer* self);
    545 /**
    546  * Stops the recognizer and invalidates the recognition result object.
    547  * Calling this function before the recognizer receives the last frame causes the recognition
    548  * to abort.
    549  *
    550  * @param self SR_Recognizer handle
    551  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
    552  */
    553 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStop(SR_Recognizer* self);
    554 
    555 /**
    556  * @name Recognizer Setup operations
    557  *
    558  * @{
    559  */
    560 
    561 /**
    562  * Create a new recognizer.
    563  *
    564  * @param self SR_Recognizer handle
    565  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY if system is out of memory;
    566  * ESR_INVALID_STATE if an internal error occurs
    567  */
    568 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self);
    569 /**
    570  * Destroy a recognizer.
    571  *
    572  * @param self SR_Recognizer handle
    573  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
    574  */
    575 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDestroy(SR_Recognizer* self);
    576 /**
    577  * Associates a set of models with the recognizer. All grammars must use models consistently.
    578  *
    579  * @param self SR_Recognizer handle
    580  * @see SR_RecognizerCheckGrammarConsistency
    581  * @return ESR_INVALID_ARGUMENT if self is null
    582  */
    583 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetup(SR_Recognizer* self);
    584 /**
    585  * Unconfigures recognizer.
    586  *
    587  * @param self SR_Recognizer handle
    588  * @return ESR_INVALID_ARGUMENT if self is null
    589  */
    590 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerUnsetup(SR_Recognizer* self);
    591 /**
    592  * Indicates whether recognizer is configured for use.
    593  *
    594  * @param self SR_Recognizer handle
    595  * @param isSetup True if recognizer is configured
    596  * @return ESR_INVALID_ARGUMENT if self is null
    597  */
    598 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSetup(SR_Recognizer* self, ESR_BOOL* isSetup);
    599 
    600 /**
    601  * @}
    602  *
    603  * @name Recognizer parameter operations
    604  *
    605  * @{
    606  */
    607 
    608 /**
    609  * Returns copy of LCHAR recognition parameter.
    610  *
    611  * @param self SR_Recognizer handle
    612  * @param key Parameter name
    613  * @param value [out] Used to hold the parameter value
    614  * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW,
    615  *            the required length is returned in this variable.
    616  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
    617  * type LCHAR*
    618  */
    619 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value, size_t* len);
    620 /**
    621  * Return copy of size_t recognition parameter.
    622  *
    623  * @param self SR_Recognizer handle
    624  * @param key Parameter name
    625  * @param value Used to hold the parameter value
    626  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
    627  * type size_t
    628  */
    629 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t* value);
    630 /**
    631  * Return copy of BOOL recognition parameter.
    632  *
    633  * @param self SR_Recognizer handle
    634  * @param key Parameter name
    635  * @param value Used to hold the parameter value
    636  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
    637  * type bool
    638  */
    639 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value);
    640 /**
    641  * Sets LCHAR* recognition parameters.
    642  *
    643  * Key:             Description of associated value
    644  *
    645  * VoiceEnrollment       If "true", the next recognition will produce data required
    646  *                              for Nametag support (i.e. Aurora bitstream).
    647  *
    648  * @param self SR_Recognizer handle
    649  * @param key Parameter name
    650  * @param value Parameter value
    651  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
    652  */
    653 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value);
    654 /**
    655  * Sets size_t recognition parameter.
    656  *
    657  * @param self SR_Recognizer handle
    658  * @param key Parameter name
    659  * @param value Parameter value
    660  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
    661  */
    662 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t value);
    663 /**
    664  * Sets BOOL recognition parameter.
    665  *
    666  * @param self SR_Recognizer handle
    667  * @param key Parameter name
    668  * @param value Parameter value
    669  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
    670  */
    671 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value);
    672 
    673 /**
    674  * @}
    675  *
    676  * @name Recognizer rule Setup/Activation operations
    677  *
    678  * @{
    679  */
    680 
    681 /**
    682  * Recognizer may be set up with multiple Grammars and multiple rules. All grammars
    683  * must be unsetup before the recognizer can be destroyed.
    684  * A pre-compiled Grammar should have undergone a model consistency check with the
    685  * recognizer prior to this call.
    686  *
    687  * @param self SR_Recognizer handle
    688  * @param grammar Grammar containing rule
    689  * @param ruleName Name of rule to associate with recognizer
    690  * @see SR_GrammarCheckModelConsistency
    691  * @return ESR_INVALID_ARGUMENT if self is null
    692  */
    693 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetupRule(SR_Recognizer* self,
    694                                                           struct SR_Grammar_t* grammar,
    695     const LCHAR* ruleName);
    696 /**
    697  * Indicates if Recognizer is configured with any rules within the specified Grammar.
    698  *
    699  * @param self SR_Recognizer handle
    700  * @param hasSetupRules True if the Recognizer is configured for the Grammar
    701  * @return ESR_INVALID_ARGUMENT if self is null
    702  */
    703 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerHasSetupRules(SR_Recognizer* self,
    704     ESR_BOOL* hasSetupRules);
    705 /**
    706  * Activates rule in recognizer.
    707  *
    708  * @param self SR_Recognizer handle
    709  * @param grammar Grammar containing rule
    710  * @param ruleName Name of rule
    711  * @param weight Relative weight to assign to self grammar vs. other activated grammars.
    712  *               Values: Integers 0-2^31.
    713  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer,
    714  * or if the rule could not be setup, or if the acoustic models could not be setup;
    715  * ESR_BUFFER_OVERFLOW if ruleName is too long
    716  */
    717 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerActivateRule(SR_Recognizer* self,
    718                                                              struct SR_Grammar_t* grammar,
    719     const LCHAR* ruleName,
    720     unsigned int weight);
    721 /**
    722  * Deactivates rule in recognizer.
    723  *
    724  * @param self SR_Recognizer handle
    725  * @param grammar Grammar containing rule
    726  * @param ruleName Name of rule
    727  * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated
    728  */
    729 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateRule(SR_Recognizer* self,
    730                                                                struct SR_Grammar_t* grammar,
    731     const LCHAR* ruleName);
    732 
    733 /**
    734  * Deactivates all grammar rule in recognizer.
    735  *
    736  * @param self SR_Recognizer handle
    737  * @return ESR_INVALID_ARGUMENT if self is null
    738  */
    739 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateAllRules(SR_Recognizer* self);
    740 
    741 /**
    742  * Indicates if rule is active in recognizer.
    743  *
    744  * @param self SR_Recognizer handle
    745  * @param grammar Grammar containing rule
    746  * @param ruleName Name of rule
    747  * @param isActiveRule True if rule is active
    748  * @return ESR_INVALID_ARGUMENT if self is null
    749  */
    750 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsActiveRule(SR_Recognizer* self,
    751                                                              struct SR_Grammar_t* grammar,
    752     const LCHAR* ruleName,
    753     ESR_BOOL* isActiveRule);
    754 /**
    755  * Ensure the model usage in a pre-compiled grammar is consistent with the models
    756  * that are associated with the Recognizer. You must first have called Recognizer_Setup().
    757  *
    758  * @param self SR_Recognizer handle
    759  * @param grammar Grammar to check against
    760  * @param isConsistent True if rule is consistent
    761  * @return ESR_INVALID_ARGUMENT if self is null
    762  */
    763 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCheckGrammarConsistency(SR_Recognizer* self,
    764                                                                         struct SR_Grammar_t* grammar,
    765     ESR_BOOL* isConsistent);
    766 /**
    767  * @}
    768  *
    769  * @name Recognizer Advance operations
    770  *
    771  * @{
    772  */
    773 
    774 /**
    775  * Get audio into the recognizer.
    776  *
    777  * We decouple the Audio and frontend processing from the Recognizer processing via an
    778  * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least
    779  * as fast as real time so that voicing events are not unduly delayed. The audio buffer size
    780  * must be at least one frame buffer's worth and some reasonable maximum size for synchronous
    781  * behaviour. This function may be called independently of Recognizer_Advance.
    782  *
    783  * @param self SR_Recognizer handle
    784  * @param buffer Buffer containing audio data
    785  * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow,
    786  *                            ESR_BUFFER_OVERFLOW is returned and this value holds the actual
    787  *                            amount of samples that were pushed.
    788  * @param isLast Indicates if the audio frame is the last one in this recognition
    789  * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't
    790  * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is
    791  * full
    792  */
    793 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerPutAudio(SR_Recognizer* self, asr_int16_t* buffer,
    794     size_t* bufferSize, ESR_BOOL isLast);
    795 /**
    796  * Advance the recognizer by at least one utterance frame. The number of frames advanced
    797  * depends on the underlying definition. We anticipate that the recognizer will keep up with
    798  * the supplied audio buffers when waiting for voicing. After this point, the number of frames
    799  * may be one (for our default frame-advance mode) or it may be more if the synchronous nature
    800  * of this operation is not considered a problem. The recognizer may be advanced independently
    801  * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data.
    802  * A stop condition could be an appropriate consequence.
    803  *
    804  * @param self Recognizer handle
    805  * @param status Resulting recognizer status
    806  * @param type Resulting recognition result type
    807  * @param result Resulting recognizer result
    808  * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs
    809  */
    810 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerAdvance(SR_Recognizer* self,
    811     SR_RecognizerStatus* status,
    812     SR_RecognizerResultType* type,
    813     SR_RecognizerResult** result);
    814 /**
    815  * @}
    816  */
    817 
    818 /**
    819  * Log recognizer-related event token.
    820  *
    821  * @param self SR_Recognizer handle
    822  * @param token Token name
    823  * @param value Value to be logged
    824  * @return ESR_INVALID_ARGUMENT if self is null
    825  */
    826 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogToken(SR_Recognizer* self, const LCHAR* token, const LCHAR* value);
    827 
    828 /**
    829  * Log recognizer-related event token integer.
    830  *
    831  * @param self SR_Recognizer handle
    832  * @param token Token name
    833  * @param value Value to be logged
    834  * @return ESR_INVALID_ARGUMENT if self is null
    835  */
    836 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogTokenInt(SR_Recognizer* self, const LCHAR* token, int value);
    837 
    838 /**
    839  * Log recognizer-related event and dump all previously accumulated tokens since last event to
    840  * log.
    841  *
    842  * @param self SR_Recognizer handle
    843  * @param event Event name
    844  * @return ESR_INVALID_ARGUMENT if self is null
    845  */
    846 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogEvent(SR_Recognizer* self, const LCHAR* event);
    847 
    848 /**
    849  * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions)
    850  * and it is up to the application to decided when the session ends and a new one begins (e.g.
    851  * timeout, number of recognitions, etc.)
    852  *
    853  * @param self SR_Recognizer handle
    854  * @param sessionName Session name
    855  * @return ESR_INVALID_ARGUMENT if self is null
    856  */
    857 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionStart(SR_Recognizer* self, const LCHAR* sessionName);
    858 
    859 /**
    860  * Log the end of a log session.
    861  *
    862  * @param self SR_Recognizer handle
    863  * @return ESR_INVALID_ARGUMENT if self is null
    864  */
    865 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionEnd(SR_Recognizer* self);
    866 
    867 /**
    868  * Log data about a waveform obtained from a TCP file. This function is not called
    869  * when doing live recognition.
    870  *
    871  * @param self SR_Recognizer handle
    872  * @param waveformFilename Session name
    873  * @param transcription Transcription for the utterance
    874  * @param bos Beginning of speech (seconds)
    875  * @param eos End of speech (seconds)
    876  * @param isInvocab True if the transcription is accepted by the grammar, False otherwise
    877  * @return ESR_INVALID_ARGUMENT if self is null
    878  */
    879 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogWaveformData(SR_Recognizer* self,
    880     const LCHAR* waveformFilename,
    881     const LCHAR* transcription,
    882     const double bos,
    883     const double eos,
    884     ESR_BOOL isInvocab);
    885 
    886 
    887 /**
    888  * Loads utterance from file.
    889  *
    890  * @param self SR_Recognizer handle
    891  * @param filename File to read from
    892  * @return ESR_INVALID_ARGUMENT if self is null
    893  */
    894 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadUtterance(SR_Recognizer* self, const LCHAR* filename);
    895 /**
    896  * Loads utterance from WAVE file.
    897  *
    898  * @param self SR_Recognizer handle
    899  * @param filename WAVE file to read from
    900  * @return ESR_INVALID_ARGUMENT if self is null
    901  */
    902 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadWaveFile(SR_Recognizer* self, const LCHAR* filename);
    903 
    904 /**
    905  * Associates a locking function with the recognizer. This function is used to
    906  * protect internal data from multithreaded access.
    907  *
    908  * @param self SR_Recognizer handle
    909  * @param function Locking function
    910  * @param data Function data
    911  * @return ESR_INVALID_ARGUMENT if self is null
    912  */
    913 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetLockFunction(SR_Recognizer* self,
    914     SR_RecognizerLockFunction function,
    915     void* data);
    916 
    917 /**
    918  *
    919  * @name Signal quality metrics
    920  *
    921  * @{
    922  */
    923 
    924 /**
    925  * Indicates if signal is getting clipped.
    926  *
    927  * @param self SR_Recognizer handle
    928  * @param isClipping [out] Result value
    929  * @return ESR_INVALID_ARGUMENT if self is null
    930  */
    931 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalClipping(SR_Recognizer* self, ESR_BOOL* isClipping);
    932 /**
    933  * Indicates if signal has a DC-offset component.
    934  *
    935  * @param self SR_Recognizer handle
    936  * @param isDCOffset [out] Result value
    937  * @return ESR_INVALID_ARGUMENT if self is null
    938  */
    939 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalDCOffset(SR_Recognizer* self, ESR_BOOL* isDCOffset);
    940 /**
    941  * Indicates if signal is noisy.
    942  *
    943  * @param self SR_Recognizer handle
    944  * @param isNoisy [out] Result value
    945  * @return ESR_INVALID_ARGUMENT if self is null
    946  */
    947 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalNoisy(SR_Recognizer* self, ESR_BOOL* isNoisy);
    948 /**
    949  * Indicates if speech contained within the signal is too quiet.
    950  *
    951  * @param self SR_Recognizer handle
    952  * @param isTooQuiet [out] Result value
    953  * @return ESR_INVALID_ARGUMENT if self is null
    954  */
    955 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooQuiet(SR_Recognizer* self, ESR_BOOL* isTooQuiet);
    956 /**
    957  * Indicates if there are too few samples in the signal for a proper recognition.
    958  *
    959  * @param self SR_Recognizer handle
    960  * @param isTooFewSamples [out] Result value
    961  * @return ESR_INVALID_ARGUMENT if self is null
    962  */
    963 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooFewSamples(SR_Recognizer* self, ESR_BOOL* isTooFewSamples);
    964 /**
    965  * Indicates if there are too many samples in the signal for a proper recognition.
    966  *
    967  * @param self SR_Recognizer handle
    968  * @param isTooManySamples [out] Result value
    969  * @return ESR_INVALID_ARGUMENT if self is null
    970  */
    971 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooManySamples(SR_Recognizer* self, ESR_BOOL* isTooManySamples);
    972 
    973 /**
    974  * Changes the sample rate of audio.
    975  *
    976  * @param self SR_Recognizer handle
    977  * @param new_sample_rate [in] New Sample Rate
    978  * @return ESR_ReturnCode if self is null
    979  */
    980 SREC_RECOGNIZER_API ESR_ReturnCode SR_Recognizer_Change_Sample_Rate ( SR_Recognizer *self, size_t new_sample_rate );
    981 
    982 /**
    983  * @}
    984  */
    985 
    986 /**
    987  * @}
    988  */
    989 
    990 
    991 #endif /* __SR_RECOGNIZER_H */
    992