1 /*---------------------------------------------------------------------------* 2 * SR_Recognizer.h * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #ifndef __SR_RECOGNIZER_H 21 #define __SR_RECOGNIZER_H 22 23 24 25 #include "ESR_ReturnCode.h" 26 #include "SR_RecognizerPrefix.h" 27 #include "SR_AcousticModels.h" 28 #include "SR_Grammar.h" 29 #include "SR_RecognizerResult.h" 30 #include "SR_Nametags.h" 31 #include "pstdio.h" 32 #include "ptypes.h" 33 34 /* forward decl needed because of SR_Recognizer.h <-> SR_Grammar.h include loop */ 35 struct SR_Grammar_t; 36 37 /** 38 * Recognizer status. 39 */ 40 typedef enum SR_RecognizerStatus_t 41 { 42 /** 43 * Reserved value. 44 */ 45 SR_RECOGNIZER_EVENT_INVALID, 46 /** 47 * Recognizer could not find a match for the utterance. 48 */ 49 SR_RECOGNIZER_EVENT_NO_MATCH, 50 /** 51 * Recognizer processed one frame of audio. 52 */ 53 SR_RECOGNIZER_EVENT_INCOMPLETE, 54 /** 55 * Recognizer has just been started. 56 */ 57 SR_RECOGNIZER_EVENT_STARTED, 58 /** 59 * Recognizer is stopped. 60 */ 61 SR_RECOGNIZER_EVENT_STOPPED, 62 /** 63 * Beginning of speech detected. 64 */ 65 SR_RECOGNIZER_EVENT_START_OF_VOICING, 66 /** 67 * End of speech detected. 68 */ 69 SR_RECOGNIZER_EVENT_END_OF_VOICING, 70 /** 71 * Beginning of utterance occured too soon. 72 */ 73 SR_RECOGNIZER_EVENT_SPOKE_TOO_SOON, 74 /** 75 * Recognition match detected. 76 */ 77 SR_RECOGNIZER_EVENT_RECOGNITION_RESULT, 78 /** 79 * Timeout occured before beginning of utterance. 80 */ 81 SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT, 82 /** 83 * Timeout occured before speech recognition could complete. 84 */ 85 SR_RECOGNIZER_EVENT_RECOGNITION_TIMEOUT, 86 /** 87 * Not enough samples to process one frame. 88 */ 89 SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO, 90 /** 91 * More audio encountered than is allowed by 'swirec_max_speech_duration'. 92 */ 93 SR_RECOGNIZER_EVENT_MAX_SPEECH, 94 } SR_RecognizerStatus; 95 96 /** 97 * Type of RecognizerResult returned by SR_RecognizerAdvance(). 98 */ 99 typedef enum SR_RecognizerResultType_t 100 { 101 /** 102 * Reserved value. 103 */ 104 SR_RECOGNIZER_RESULT_TYPE_INVALID, 105 /** 106 * The result is complete from a full recognition of audio. 107 */ 108 SR_RECOGNIZER_RESULT_TYPE_COMPLETE, 109 /** 110 * No results at this time. 111 */ 112 SR_RECOGNIZER_RESULT_TYPE_NONE, 113 } SR_RecognizerResultType; 114 115 /** 116 * SR_Utterance stubbed out. 117 */ 118 typedef void* SR_Utterance; 119 120 typedef enum 121 { 122 ESR_LOCK, 123 ESR_UNLOCK 124 } ESR_LOCKMODE; 125 126 /** 127 * Function which will be invoked before accessing internal variables. 128 */ 129 typedef ESR_ReturnCode(*SR_RecognizerLockFunction)(ESR_LOCKMODE mode, void* data); 130 131 /** 132 * @addtogroup SR_RecognizerModule SR_Recognizer API functions 133 * Synchronous speech recognizer. 134 * 135 * @{ 136 */ 137 138 /** 139 * Synchronous speech recognizer. 140 */ 141 typedef struct SR_Recognizer_t 142 { 143 /** 144 * Starts recognition. 145 * 146 * @param self SR_Recognizer handle 147 * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer, 148 * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason 149 */ 150 ESR_ReturnCode(*start)(struct SR_Recognizer_t* self); 151 /** 152 * Stops the recognizer and invalidates the recognition result object. 153 * Calling this function before the recognizer receives the last frame causes the recognition 154 * to abort. 155 * 156 * @param self SR_Recognizer handle 157 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured 158 */ 159 ESR_ReturnCode(*stop)(struct SR_Recognizer_t* self); 160 /** 161 * Destroy a recognizer. 162 * 163 * @param self SR_Recognizer handle 164 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured 165 */ 166 ESR_ReturnCode(*destroy)(struct SR_Recognizer_t* self); 167 /** 168 * Associates a set of models with the recognizer. 169 * 170 * @param self SR_Recognizer handle 171 * @return ESR_INVALID_ARGUMENT if self is null 172 */ 173 ESR_ReturnCode(*setup)(struct SR_Recognizer_t* self); 174 /** 175 * Unconfigures recognizer. 176 * 177 * @param self SR_Recognizer handle 178 * @return ESR_INVALID_ARGUMENT if self is null 179 */ 180 ESR_ReturnCode(*unsetup)(struct SR_Recognizer_t* self); 181 /** 182 * Indicates whether recognizer is configured for use. 183 * 184 * @param self SR_Recognizer handle 185 * @param isSetup True if recognizer is configured 186 * @return ESR_INVALID_ARGUMENT if self is null 187 */ 188 ESR_ReturnCode(*isSetup)(struct SR_Recognizer_t* self, ESR_BOOL* isSetup); 189 190 /** 191 * Returns copy of LCHAR recognition parameter. 192 * 193 * @param self SR_Recognizer handle 194 * @param key Parameter name 195 * @param value [out] Used to hold the parameter value 196 * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW, 197 * the required length is returned in this variable. 198 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of 199 * type LCHAR* 200 */ 201 ESR_ReturnCode(*getParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value, size_t* len); 202 /** 203 * Return copy of size_t recognition parameter. 204 * 205 * @param self SR_Recognizer handle 206 * @param key Parameter name 207 * @param value [out] Used to hold the parameter value 208 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of 209 * type size_t 210 */ 211 ESR_ReturnCode(*getSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t* value); 212 /** 213 * Return copy of BOOL recognition parameter. 214 * 215 * @param self SR_Recognizer handle 216 * @param key Parameter name 217 * @param value [out] Used to hold the parameter value 218 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of 219 * type bool 220 */ 221 ESR_ReturnCode(*getBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL* value); 222 /** 223 * Sets recognition parameters. 224 * 225 * Key: Description of associated value 226 * 227 * VoiceEnrollment If "true", the next recognition will produce data required 228 * for Nametag support (i.e. Aurora bitstream). 229 * 230 * @param self SR_Recognizer handle 231 * @param key Parameter name 232 * @param value Parameter value 233 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory 234 */ 235 ESR_ReturnCode(*setParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value); 236 /** 237 * Sets recognition parameters. 238 * 239 * @param self SR_Recognizer handle 240 * @param key Parameter name 241 * @param value Parameter value 242 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory 243 */ 244 ESR_ReturnCode(*setSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t value); 245 /** 246 * Sets recognition parameters. 247 * 248 * @param self SR_Recognizer handle 249 * @param key Parameter name 250 * @param value Parameter value 251 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory 252 */ 253 ESR_ReturnCode(*setBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL value); 254 255 /** 256 * Recognizer may be set up with multiple Grammars and multiple rules. All grammars 257 * must be unsetup before the recognizer can be destroy. 258 * A pre-compiled Grammar should have undergone a model consistency check with the 259 * recognizer prior to this call. 260 * 261 * @param self SR_Recognizer handle 262 * @param grammar Grammar containing rule 263 * @param ruleName Name of rule to associate with recognizer 264 * @see SR_GrammarCheckModelConsistency 265 * @return ESR_INVALID_ARGUMENT if self is null 266 */ 267 ESR_ReturnCode (*setupRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, const LCHAR* ruleName); 268 /** 269 * Indicates if Recognizer is configured with any rules within the specified Grammar. 270 * 271 * @param self SR_Recognizer handle 272 * @param hasSetupRules True if the Recognizer is configured for the Grammar 273 * @return ESR_INVALID_ARGUMENT if self is null 274 */ 275 ESR_ReturnCode(*hasSetupRules)(struct SR_Recognizer_t* self, ESR_BOOL* hasSetupRules); 276 /** 277 * Activates rule in recognizer. 278 * 279 * @param self SR_Recognizer handle 280 * @param grammar Grammar containing rule 281 * @param ruleName Name of rule 282 * @param weight Relative weight to assign to self grammar vs. other activated grammars. 283 * Values: Integers 0-2^31. 284 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer, 285 * or if the rule could not be setup, or if the acoustic models could not be setup; 286 * ESR_BUFFER_OVERFLOW if ruleName is too long 287 */ 288 ESR_ReturnCode (*activateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, 289 const LCHAR* ruleName, unsigned int weight); 290 /** 291 * Deactivates rule in recognizer. 292 * 293 * @param self SR_Recognizer handle 294 * @param grammar Grammar containing rule 295 * @param ruleName Name of root rule 296 * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated 297 */ 298 ESR_ReturnCode (*deactivateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, 299 const LCHAR* ruleName); 300 301 /** 302 * Deactivates all grammar rules in recognizer. 303 * 304 * @param self SR_Recognizer handle 305 * @return ESR_INVALID_ARGUMENT if self is null 306 */ 307 ESR_ReturnCode(*deactivateAllRules)(struct SR_Recognizer_t* self); 308 309 /** 310 * Indicates if rule is active in recognizer. 311 * 312 * @param self SR_Recognizer handle 313 * @param grammar Grammar containing rule 314 * @param ruleName Name of rule 315 * @param isActiveRule True if rule is active 316 * @return ESR_INVALID_ARGUMENT if self is null 317 */ 318 ESR_ReturnCode (*isActiveRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, 319 const LCHAR* ruleName, ESR_BOOL* isActiveRule); 320 /** 321 * Configures the grammar for maximum amount of word addition 322 * 323 * @param self SR_Recognizer handle 324 * @param grammar Grammar whose ceiling to be set 325 * @return ESR_INVALID_ARGUMENT if self or grammar are null 326 */ 327 ESR_ReturnCode (*setWordAdditionCeiling)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar ); 328 /** 329 * Ensure the model usage in a pre-compiled grammar is consistent with the models 330 * that are associated with the Recognizer. You must first have called Recognizer_Setup(). 331 * 332 * @param self SR_Recognizer handle 333 * @param grammar Grammar to check against 334 * @param isConsistent True if rule is consistent 335 * @return ESR_INVALID_ARGUMENT if self is null 336 */ 337 ESR_ReturnCode (*checkGrammarConsistency)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, 338 ESR_BOOL* isConsistent); 339 340 /** 341 * Ensure the model usage in a pre-compiled grammar is consistent with the models 342 * that are associated with the Recognizer. You must first have called Recognizer_Setup(). 343 * 344 * @param self SR_Recognizer handle 345 * @param grammar Grammar to check against 346 * @param isConsistent True if rule is consistent 347 * @return ESR_INVALID_ARGUMENT if self is null 348 */ 349 ESR_ReturnCode (*getModels)(struct SR_Recognizer_t* self, SR_AcousticModels** pmodels); 350 351 /** 352 * Get audio into the recognizer. 353 * 354 * We decouple the Audio and frontend processing from the Recognizer processing via an 355 * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least 356 * as fast as real time so that voicing events are not unduly delayed. The audio buffer size 357 * must be at least one frame buffer's worth and some reasonable maximum size for synchronous 358 * behaviour. This function may be called independently of Recognizer_Advance. 359 * 360 * @param self SR_Recognizer handle 361 * @param buffer Buffer containing audio data 362 * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow, 363 * ESR_BUFFER_OVERFLOW is returned and this value holds the actual 364 * amount of samples that were pushed. 365 * @param isLast Indicates if the audio frame is the last one in this recognition 366 * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't 367 * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is 368 * full 369 */ 370 ESR_ReturnCode (*putAudio)(struct SR_Recognizer_t* self, asr_int16_t* buffer, size_t* bufferSize, 371 ESR_BOOL isLast); 372 /** 373 * Advance the recognizer by at least one utterance frame. The number of frames advanced 374 * depends on the underlying definition. We anticipate that the recognizer will keep up with 375 * the supplied audio buffers when waiting for voicing. After this point, the number of frames 376 * may be one (for our default frame-advance mode) or it may be more if the synchronous nature 377 * of this operation is not considered a problem. The recognizer may be advanced independently 378 * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data. 379 * A stop condition could be an appropriate consequence. 380 * 381 * @param self Recognizer handle 382 * @param status Resulting recognizer status 383 * @param type Resulting recognition result type 384 * @param result Resulting recognizer result 385 * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs 386 */ 387 ESR_ReturnCode(*advance)(struct SR_Recognizer_t* self, SR_RecognizerStatus* status, 388 SR_RecognizerResultType* type, SR_RecognizerResult** result); 389 390 391 /** 392 * Loads utterance from file. 393 * 394 * @param self SR_Recognizer handle 395 * @param filename File to read from 396 * @return ESR_INVALID_ARGUMENT if self is null 397 */ 398 ESR_ReturnCode(*loadUtterance)(struct SR_Recognizer_t* self, const LCHAR* filename); 399 /** 400 * Loads utterance from WAVE file. 401 * 402 * @param self SR_Recognizer handle 403 * @param filename WAVE file to read from 404 * @return ESR_INVALID_ARGUMENT if self is null 405 */ 406 ESR_ReturnCode(*loadWaveFile)(struct SR_Recognizer_t* self, const LCHAR* filename); 407 408 /** 409 * Log recognizer-related event token. 410 * 411 * @param self SR_Recognizer handle 412 * @param event Token name 413 * @param value Value to be logged 414 * @return ESR_INVALID_ARGUMENT if self is null 415 */ 416 ESR_ReturnCode(*logToken)(struct SR_Recognizer_t* self, const LCHAR* token, const LCHAR* value); 417 418 /** 419 * Log recognizer-related event token integer. 420 * 421 * @param self SR_Recognizer handle 422 * @param event Token name 423 * @param value Value to be logged 424 * @return ESR_INVALID_ARGUMENT if self is null 425 */ 426 ESR_ReturnCode(*logTokenInt)(struct SR_Recognizer_t* self, const LCHAR* token, int value); 427 428 /** 429 * Log recognizer-related event and dump all previously accumulated tokens since last event to 430 * log. 431 * 432 * @param self SR_Recognizer handle 433 * @param event Event name 434 * @return ESR_INVALID_ARGUMENT if self is null 435 */ 436 ESR_ReturnCode(*logEvent)(struct SR_Recognizer_t* self, const LCHAR* event); 437 438 /** 439 * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions) 440 * and it is up to the application to decided when the session ends and a new one begins (e.g. 441 * timeout, number of recognitions, etc.) 442 * 443 * @param self SR_Recognizer handle 444 * @param sessionName Session name 445 * @return ESR_INVALID_ARGUMENT if self is null 446 */ 447 ESR_ReturnCode(*logSessionStart)(struct SR_Recognizer_t* self, const LCHAR* sessionName); 448 449 /** 450 * Log the end of a log session. 451 * 452 * @param self SR_Recognizer handle 453 * @return ESR_INVALID_ARGUMENT if self is null 454 */ 455 ESR_ReturnCode(*logSessionEnd)(struct SR_Recognizer_t* self); 456 457 /** 458 * Log data about a waveform obtained from a TCP file. This function is not called 459 * when doing live recognition. 460 * 461 * @param self SR_Recognizer handle 462 * @param waveformFilename Session name 463 * @param transcription Transcription for the utterance 464 * @param bos Beginning of speech (seconds) 465 * @param eos End of speech (seconds) 466 * @param isInvocab True if the transcription is accepted by the grammar, False otherwise 467 * @return ESR_INVALID_ARGUMENT if self is null 468 */ 469 ESR_ReturnCode(*logWaveformData)(struct SR_Recognizer_t* self, 470 const LCHAR* waveformFilename, 471 const LCHAR* transcription, 472 const double bos, 473 const double eos, 474 ESR_BOOL isInvocab); 475 476 /** 477 * Associates a locking function with the recognizer. This function is used to 478 * protect internal data from multithreaded access. 479 * 480 * @param self SR_Recognizer handle 481 * @param function Locking function 482 * @param data Function data 483 * @return ESR_INVALID_ARGUMENT if self is null 484 */ 485 ESR_ReturnCode(*setLockFunction)(struct SR_Recognizer_t *self, SR_RecognizerLockFunction function, void* data); 486 /** 487 * Indicates if signal is getting clipped. 488 * 489 * @param self SR_Recognizer handle 490 * @param isClipping [out] Result value 491 * @return ESR_INVALID_ARGUMENT if self is null 492 */ 493 ESR_ReturnCode(*isSignalClipping)(struct SR_Recognizer_t* self, ESR_BOOL* isClipping); 494 /** 495 * Indicates if signal has a DC-offset component. 496 * 497 * @param self SR_Recognizer handle 498 * @param isDCOffset [out] Result value 499 * @return ESR_INVALID_ARGUMENT if self is null 500 */ 501 ESR_ReturnCode(*isSignalDCOffset)(struct SR_Recognizer_t* self, ESR_BOOL* isDCOffset); 502 /** 503 * Indicates if signal is noisy. 504 * 505 * @param self SR_Recognizer handle 506 * @param isNoisy [out] Result value 507 * @return ESR_INVALID_ARGUMENT if self is null 508 */ 509 ESR_ReturnCode(*isSignalNoisy)(struct SR_Recognizer_t* self, ESR_BOOL* isNoisy); 510 /** 511 * Indicates if speech contained within the signal is too quiet. 512 * 513 * @param self SR_Recognizer handle 514 * @param isTooQuiet [out] Result value 515 * @return ESR_INVALID_ARGUMENT if self is null 516 */ 517 ESR_ReturnCode(*isSignalTooQuiet)(struct SR_Recognizer_t* self, ESR_BOOL* isTooQuiet); 518 /** 519 * Indicates if there are too few samples in the signal for a proper recognition. 520 * 521 * @param self SR_Recognizer handle 522 * @param isTooFewSamples [out] Result value 523 * @return ESR_INVALID_ARGUMENT if self is null 524 */ 525 ESR_ReturnCode(*isSignalTooFewSamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooFewSamples); 526 /** 527 * Indicates if there are too many samples in the signal for a proper recognition. 528 * 529 * @param self SR_Recognizer handle 530 * @param isTooManySamples [out] Result value 531 * @return ESR_INVALID_ARGUMENT if self is null 532 */ 533 ESR_ReturnCode(*isSignalTooManySamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooManySamples); 534 } 535 SR_Recognizer; 536 537 /** 538 * Starts recognition. 539 * 540 * @param self SR_Recognizer handle 541 * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer, 542 * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason 543 */ 544 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStart(SR_Recognizer* self); 545 /** 546 * Stops the recognizer and invalidates the recognition result object. 547 * Calling this function before the recognizer receives the last frame causes the recognition 548 * to abort. 549 * 550 * @param self SR_Recognizer handle 551 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured 552 */ 553 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStop(SR_Recognizer* self); 554 555 /** 556 * @name Recognizer Setup operations 557 * 558 * @{ 559 */ 560 561 /** 562 * Create a new recognizer. 563 * 564 * @param self SR_Recognizer handle 565 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY if system is out of memory; 566 * ESR_INVALID_STATE if an internal error occurs 567 */ 568 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self); 569 /** 570 * Destroy a recognizer. 571 * 572 * @param self SR_Recognizer handle 573 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured 574 */ 575 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDestroy(SR_Recognizer* self); 576 /** 577 * Associates a set of models with the recognizer. All grammars must use models consistently. 578 * 579 * @param self SR_Recognizer handle 580 * @see SR_RecognizerCheckGrammarConsistency 581 * @return ESR_INVALID_ARGUMENT if self is null 582 */ 583 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetup(SR_Recognizer* self); 584 /** 585 * Unconfigures recognizer. 586 * 587 * @param self SR_Recognizer handle 588 * @return ESR_INVALID_ARGUMENT if self is null 589 */ 590 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerUnsetup(SR_Recognizer* self); 591 /** 592 * Indicates whether recognizer is configured for use. 593 * 594 * @param self SR_Recognizer handle 595 * @param isSetup True if recognizer is configured 596 * @return ESR_INVALID_ARGUMENT if self is null 597 */ 598 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSetup(SR_Recognizer* self, ESR_BOOL* isSetup); 599 600 /** 601 * @} 602 * 603 * @name Recognizer parameter operations 604 * 605 * @{ 606 */ 607 608 /** 609 * Returns copy of LCHAR recognition parameter. 610 * 611 * @param self SR_Recognizer handle 612 * @param key Parameter name 613 * @param value [out] Used to hold the parameter value 614 * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW, 615 * the required length is returned in this variable. 616 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of 617 * type LCHAR* 618 */ 619 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value, size_t* len); 620 /** 621 * Return copy of size_t recognition parameter. 622 * 623 * @param self SR_Recognizer handle 624 * @param key Parameter name 625 * @param value Used to hold the parameter value 626 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of 627 * type size_t 628 */ 629 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t* value); 630 /** 631 * Return copy of BOOL recognition parameter. 632 * 633 * @param self SR_Recognizer handle 634 * @param key Parameter name 635 * @param value Used to hold the parameter value 636 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of 637 * type bool 638 */ 639 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value); 640 /** 641 * Sets LCHAR* recognition parameters. 642 * 643 * Key: Description of associated value 644 * 645 * VoiceEnrollment If "true", the next recognition will produce data required 646 * for Nametag support (i.e. Aurora bitstream). 647 * 648 * @param self SR_Recognizer handle 649 * @param key Parameter name 650 * @param value Parameter value 651 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory 652 */ 653 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value); 654 /** 655 * Sets size_t recognition parameter. 656 * 657 * @param self SR_Recognizer handle 658 * @param key Parameter name 659 * @param value Parameter value 660 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory 661 */ 662 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t value); 663 /** 664 * Sets BOOL recognition parameter. 665 * 666 * @param self SR_Recognizer handle 667 * @param key Parameter name 668 * @param value Parameter value 669 * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory 670 */ 671 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value); 672 673 /** 674 * @} 675 * 676 * @name Recognizer rule Setup/Activation operations 677 * 678 * @{ 679 */ 680 681 /** 682 * Recognizer may be set up with multiple Grammars and multiple rules. All grammars 683 * must be unsetup before the recognizer can be destroyed. 684 * A pre-compiled Grammar should have undergone a model consistency check with the 685 * recognizer prior to this call. 686 * 687 * @param self SR_Recognizer handle 688 * @param grammar Grammar containing rule 689 * @param ruleName Name of rule to associate with recognizer 690 * @see SR_GrammarCheckModelConsistency 691 * @return ESR_INVALID_ARGUMENT if self is null 692 */ 693 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetupRule(SR_Recognizer* self, 694 struct SR_Grammar_t* grammar, 695 const LCHAR* ruleName); 696 /** 697 * Indicates if Recognizer is configured with any rules within the specified Grammar. 698 * 699 * @param self SR_Recognizer handle 700 * @param hasSetupRules True if the Recognizer is configured for the Grammar 701 * @return ESR_INVALID_ARGUMENT if self is null 702 */ 703 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerHasSetupRules(SR_Recognizer* self, 704 ESR_BOOL* hasSetupRules); 705 /** 706 * Activates rule in recognizer. 707 * 708 * @param self SR_Recognizer handle 709 * @param grammar Grammar containing rule 710 * @param ruleName Name of rule 711 * @param weight Relative weight to assign to self grammar vs. other activated grammars. 712 * Values: Integers 0-2^31. 713 * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer, 714 * or if the rule could not be setup, or if the acoustic models could not be setup; 715 * ESR_BUFFER_OVERFLOW if ruleName is too long 716 */ 717 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerActivateRule(SR_Recognizer* self, 718 struct SR_Grammar_t* grammar, 719 const LCHAR* ruleName, 720 unsigned int weight); 721 /** 722 * Deactivates rule in recognizer. 723 * 724 * @param self SR_Recognizer handle 725 * @param grammar Grammar containing rule 726 * @param ruleName Name of rule 727 * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated 728 */ 729 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateRule(SR_Recognizer* self, 730 struct SR_Grammar_t* grammar, 731 const LCHAR* ruleName); 732 733 /** 734 * Deactivates all grammar rule in recognizer. 735 * 736 * @param self SR_Recognizer handle 737 * @return ESR_INVALID_ARGUMENT if self is null 738 */ 739 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateAllRules(SR_Recognizer* self); 740 741 /** 742 * Indicates if rule is active in recognizer. 743 * 744 * @param self SR_Recognizer handle 745 * @param grammar Grammar containing rule 746 * @param ruleName Name of rule 747 * @param isActiveRule True if rule is active 748 * @return ESR_INVALID_ARGUMENT if self is null 749 */ 750 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsActiveRule(SR_Recognizer* self, 751 struct SR_Grammar_t* grammar, 752 const LCHAR* ruleName, 753 ESR_BOOL* isActiveRule); 754 /** 755 * Ensure the model usage in a pre-compiled grammar is consistent with the models 756 * that are associated with the Recognizer. You must first have called Recognizer_Setup(). 757 * 758 * @param self SR_Recognizer handle 759 * @param grammar Grammar to check against 760 * @param isConsistent True if rule is consistent 761 * @return ESR_INVALID_ARGUMENT if self is null 762 */ 763 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCheckGrammarConsistency(SR_Recognizer* self, 764 struct SR_Grammar_t* grammar, 765 ESR_BOOL* isConsistent); 766 /** 767 * @} 768 * 769 * @name Recognizer Advance operations 770 * 771 * @{ 772 */ 773 774 /** 775 * Get audio into the recognizer. 776 * 777 * We decouple the Audio and frontend processing from the Recognizer processing via an 778 * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least 779 * as fast as real time so that voicing events are not unduly delayed. The audio buffer size 780 * must be at least one frame buffer's worth and some reasonable maximum size for synchronous 781 * behaviour. This function may be called independently of Recognizer_Advance. 782 * 783 * @param self SR_Recognizer handle 784 * @param buffer Buffer containing audio data 785 * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow, 786 * ESR_BUFFER_OVERFLOW is returned and this value holds the actual 787 * amount of samples that were pushed. 788 * @param isLast Indicates if the audio frame is the last one in this recognition 789 * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't 790 * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is 791 * full 792 */ 793 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerPutAudio(SR_Recognizer* self, asr_int16_t* buffer, 794 size_t* bufferSize, ESR_BOOL isLast); 795 /** 796 * Advance the recognizer by at least one utterance frame. The number of frames advanced 797 * depends on the underlying definition. We anticipate that the recognizer will keep up with 798 * the supplied audio buffers when waiting for voicing. After this point, the number of frames 799 * may be one (for our default frame-advance mode) or it may be more if the synchronous nature 800 * of this operation is not considered a problem. The recognizer may be advanced independently 801 * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data. 802 * A stop condition could be an appropriate consequence. 803 * 804 * @param self Recognizer handle 805 * @param status Resulting recognizer status 806 * @param type Resulting recognition result type 807 * @param result Resulting recognizer result 808 * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs 809 */ 810 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerAdvance(SR_Recognizer* self, 811 SR_RecognizerStatus* status, 812 SR_RecognizerResultType* type, 813 SR_RecognizerResult** result); 814 /** 815 * @} 816 */ 817 818 /** 819 * Log recognizer-related event token. 820 * 821 * @param self SR_Recognizer handle 822 * @param token Token name 823 * @param value Value to be logged 824 * @return ESR_INVALID_ARGUMENT if self is null 825 */ 826 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogToken(SR_Recognizer* self, const LCHAR* token, const LCHAR* value); 827 828 /** 829 * Log recognizer-related event token integer. 830 * 831 * @param self SR_Recognizer handle 832 * @param token Token name 833 * @param value Value to be logged 834 * @return ESR_INVALID_ARGUMENT if self is null 835 */ 836 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogTokenInt(SR_Recognizer* self, const LCHAR* token, int value); 837 838 /** 839 * Log recognizer-related event and dump all previously accumulated tokens since last event to 840 * log. 841 * 842 * @param self SR_Recognizer handle 843 * @param event Event name 844 * @return ESR_INVALID_ARGUMENT if self is null 845 */ 846 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogEvent(SR_Recognizer* self, const LCHAR* event); 847 848 /** 849 * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions) 850 * and it is up to the application to decided when the session ends and a new one begins (e.g. 851 * timeout, number of recognitions, etc.) 852 * 853 * @param self SR_Recognizer handle 854 * @param sessionName Session name 855 * @return ESR_INVALID_ARGUMENT if self is null 856 */ 857 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionStart(SR_Recognizer* self, const LCHAR* sessionName); 858 859 /** 860 * Log the end of a log session. 861 * 862 * @param self SR_Recognizer handle 863 * @return ESR_INVALID_ARGUMENT if self is null 864 */ 865 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionEnd(SR_Recognizer* self); 866 867 /** 868 * Log data about a waveform obtained from a TCP file. This function is not called 869 * when doing live recognition. 870 * 871 * @param self SR_Recognizer handle 872 * @param waveformFilename Session name 873 * @param transcription Transcription for the utterance 874 * @param bos Beginning of speech (seconds) 875 * @param eos End of speech (seconds) 876 * @param isInvocab True if the transcription is accepted by the grammar, False otherwise 877 * @return ESR_INVALID_ARGUMENT if self is null 878 */ 879 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogWaveformData(SR_Recognizer* self, 880 const LCHAR* waveformFilename, 881 const LCHAR* transcription, 882 const double bos, 883 const double eos, 884 ESR_BOOL isInvocab); 885 886 887 /** 888 * Loads utterance from file. 889 * 890 * @param self SR_Recognizer handle 891 * @param filename File to read from 892 * @return ESR_INVALID_ARGUMENT if self is null 893 */ 894 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadUtterance(SR_Recognizer* self, const LCHAR* filename); 895 /** 896 * Loads utterance from WAVE file. 897 * 898 * @param self SR_Recognizer handle 899 * @param filename WAVE file to read from 900 * @return ESR_INVALID_ARGUMENT if self is null 901 */ 902 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadWaveFile(SR_Recognizer* self, const LCHAR* filename); 903 904 /** 905 * Associates a locking function with the recognizer. This function is used to 906 * protect internal data from multithreaded access. 907 * 908 * @param self SR_Recognizer handle 909 * @param function Locking function 910 * @param data Function data 911 * @return ESR_INVALID_ARGUMENT if self is null 912 */ 913 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetLockFunction(SR_Recognizer* self, 914 SR_RecognizerLockFunction function, 915 void* data); 916 917 /** 918 * 919 * @name Signal quality metrics 920 * 921 * @{ 922 */ 923 924 /** 925 * Indicates if signal is getting clipped. 926 * 927 * @param self SR_Recognizer handle 928 * @param isClipping [out] Result value 929 * @return ESR_INVALID_ARGUMENT if self is null 930 */ 931 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalClipping(SR_Recognizer* self, ESR_BOOL* isClipping); 932 /** 933 * Indicates if signal has a DC-offset component. 934 * 935 * @param self SR_Recognizer handle 936 * @param isDCOffset [out] Result value 937 * @return ESR_INVALID_ARGUMENT if self is null 938 */ 939 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalDCOffset(SR_Recognizer* self, ESR_BOOL* isDCOffset); 940 /** 941 * Indicates if signal is noisy. 942 * 943 * @param self SR_Recognizer handle 944 * @param isNoisy [out] Result value 945 * @return ESR_INVALID_ARGUMENT if self is null 946 */ 947 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalNoisy(SR_Recognizer* self, ESR_BOOL* isNoisy); 948 /** 949 * Indicates if speech contained within the signal is too quiet. 950 * 951 * @param self SR_Recognizer handle 952 * @param isTooQuiet [out] Result value 953 * @return ESR_INVALID_ARGUMENT if self is null 954 */ 955 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooQuiet(SR_Recognizer* self, ESR_BOOL* isTooQuiet); 956 /** 957 * Indicates if there are too few samples in the signal for a proper recognition. 958 * 959 * @param self SR_Recognizer handle 960 * @param isTooFewSamples [out] Result value 961 * @return ESR_INVALID_ARGUMENT if self is null 962 */ 963 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooFewSamples(SR_Recognizer* self, ESR_BOOL* isTooFewSamples); 964 /** 965 * Indicates if there are too many samples in the signal for a proper recognition. 966 * 967 * @param self SR_Recognizer handle 968 * @param isTooManySamples [out] Result value 969 * @return ESR_INVALID_ARGUMENT if self is null 970 */ 971 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooManySamples(SR_Recognizer* self, ESR_BOOL* isTooManySamples); 972 973 /** 974 * Changes the sample rate of audio. 975 * 976 * @param self SR_Recognizer handle 977 * @param new_sample_rate [in] New Sample Rate 978 * @return ESR_ReturnCode if self is null 979 */ 980 SREC_RECOGNIZER_API ESR_ReturnCode SR_Recognizer_Change_Sample_Rate ( SR_Recognizer *self, size_t new_sample_rate ); 981 982 /** 983 * @} 984 */ 985 986 /** 987 * @} 988 */ 989 990 991 #endif /* __SR_RECOGNIZER_H */ 992