1 /*---------------------------------------------------------------------------* 2 * parseStringTest.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22 #include "pstdio.h" 23 #include "pmemory.h" 24 #include "plog.h" 25 26 27 #include "HashMap.h" 28 #include "SR_Grammar.h" 29 #include "SR_SemanticResult.h" 30 #include "ESR_Session.h" 31 #include "ESR_Locale.h" 32 #include "LCHAR.h" 33 34 #include "PFileSystem.h" 35 #include "PANSIFileSystem.h" 36 37 /* for testing RecognizerImpl.c, see below */ 38 #include"buildopt.h" 39 #include"setting.h" 40 #include"srec_sizes.h" 41 #include"SR_GrammarImpl.h" 42 43 /* defines */ 44 #define MAX_LINE_LENGTH 256 45 #define MAX_STR_LENGTH 512 46 #define MAX_SEM_RESULTS 3 47 #define MAX_KEYS 30 48 49 /* protos */ 50 ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout); 51 ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout); 52 53 /* struct */ 54 typedef struct Opts 55 { 56 int use_parse_by_string_ids; 57 int do_check_all_ids; 58 } 59 Opts; 60 61 int usage(LCHAR* exename) 62 { 63 pfprintf(PSTDOUT, "usage: %s -base <basefilename> [-in <input file>] [-out <output file>] [-itest <testfilename>]\n", exename); 64 return 1; 65 } 66 67 void lstr_strip_multiple_spaces(LCHAR* trans) 68 { 69 char *src=trans, *dst=trans; 70 for( ;(*dst = *src)!=L('\0'); src++) { 71 if(*dst != ' ') dst++; 72 else if(src[1] != ' ') dst++; 73 } 74 } 75 76 /** 77 * Display the Semantic Result 78 */ 79 void display_results(SR_SemanticResult *result, PFile* fout) 80 { 81 size_t i, size, len; 82 LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */ 83 LCHAR value[MAX_STR_LENGTH]; 84 ESR_ReturnCode rc; 85 86 size = MAX_KEYS; 87 rc = result->getKeyList(result, (LCHAR**) & keys, &size); /* get the key list */ 88 if (rc == ESR_SUCCESS) 89 { 90 for (i = 0; i < size; i++) 91 { 92 len = MAX_STR_LENGTH; 93 if ((rc = result->getValue(result, keys[i], value, &len)) == ESR_SUCCESS) 94 pfprintf(fout, "{%s : %s}\n", keys[i], value); 95 else 96 pfprintf(fout, "Error: %s\n", ESR_rc2str(rc)); 97 } 98 pfprintf(fout, "--Done--\n"); 99 } 100 else 101 pfprintf(fout, "Error: %s\n", ESR_rc2str(rc)); 102 } 103 104 ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout, Opts* opts) 105 { 106 ESR_ReturnCode rc = ESR_SUCCESS; 107 size_t i, result_count, key_count; 108 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; 109 wordID wordIDs[32], *wordIDptr; 110 SR_GrammarImpl* pgrammar = (SR_GrammarImpl*)grammar; 111 wordmap* wmap; 112 113 if (opts->do_check_all_ids) 114 { 115 wordID id; 116 Opts myopts; 117 memcpy(&myopts, opts, sizeof(myopts)); 118 myopts.do_check_all_ids = 0; 119 wmap = pgrammar->syntax->synx->olabels; 120 /* start at word 4 because "eps, -pau- -pau2- @root */ 121 for (id = 4; id < wmap->num_words; id++) 122 { 123 trans = wmap->words[id]; 124 Parse(grammar, trans, fout, &myopts); 125 } 126 return 0; 127 } 128 129 result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */ 130 for (i = 0; i < result_count; i++) 131 SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */ 132 lstrtrim(trans); 133 /* check for multiple space separators! */ 134 lstr_strip_multiple_spaces(trans); 135 136 if (!opts->use_parse_by_string_ids) 137 { 138 rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count); 139 } 140 else 141 { 142 char copy_of_trans[256], *p; 143 strcpy(copy_of_trans, trans); 144 wmap = pgrammar->syntax->synx->olabels; 145 wordIDs[0] = wordIDs[1] = MAXwordID; 146 wordIDptr = &wordIDs[0]; 147 for (p = strtok(copy_of_trans, " "); p; p = strtok(NULL, " ")) 148 { 149 for (i = 0; i < wmap->num_words; i++) 150 if (!strcmp(wmap->words[i], p)) 151 { 152 *wordIDptr++ = (wordID)i; 153 break; 154 } 155 if (i == wmap->num_words) 156 { 157 wordIDs[0] = MAXwordID; 158 break; 159 } 160 } 161 *wordIDptr++ = MAXwordID; 162 163 /* printf("wordids:"); 164 for(wordIDptr=&wordIDs[0]; *wordIDptr!=MAXwordID; wordIDptr++) 165 printf(" %d/%s", *wordIDptr, wmap->words[*wordIDptr]); 166 printf("\n"); */ 167 168 if (wordIDs[0] == MAXwordID) 169 { 170 result_count = 0; 171 rc = ESR_SUCCESS; 172 } 173 else 174 { 175 rc = pgrammar->semproc->flush(pgrammar->semproc); 176 rc = pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), trans); 177 rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph, 178 wordIDs, semanticResults, &result_count); 179 } 180 } 181 if (rc != ESR_SUCCESS) 182 { 183 pfprintf(fout, "error (%s)\n\n", trans); 184 return rc; 185 } 186 187 if (result_count < 1) 188 { 189 pfprintf(fout, "no parse (%s)\n\n", trans); 190 } 191 else 192 { 193 key_count = 0xffff; 194 rc = SR_SemanticResultGetKeyCount(semanticResults[0], &key_count); 195 pfprintf(fout, "parse ok (%d results) (%s) (%d)\n", result_count, trans, key_count); 196 for (i = 0; i < result_count; i++) 197 display_results(semanticResults[i], fout); 198 199 for (i = 0; i < MAX_SEM_RESULTS; i++) 200 { 201 rc = semanticResults[i]->destroy(semanticResults[i]); 202 if (rc != ESR_SUCCESS) 203 return rc; 204 } 205 } 206 return ESR_SUCCESS; 207 } 208 209 /* tests the transcription against the grammar and then decided based on what was expected of the test 210 whether or not is it considered a pass or fail */ 211 ESR_ReturnCode ParseTestSet(SR_Grammar* grammar, LCHAR* trans, LCHAR* key, LCHAR* ref, LCHAR* result, PFile* fout) 212 { 213 size_t len; 214 ESR_ReturnCode rc; 215 int i, result_count; 216 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; 217 LCHAR value[MAX_STR_LENGTH]; 218 219 result_count = MAX_SEM_RESULTS; 220 for (i = 0; i < result_count; i++) 221 SR_SemanticResultCreate(&semanticResults[i]); 222 223 lstrtrim(trans); 224 /* check for multiple space separators! */ 225 lstr_strip_multiple_spaces(trans); 226 227 pfprintf(fout, "checking (%s) ref(%s) res(%s)\n", trans, ref, result); 228 rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count); 229 if (rc != ESR_SUCCESS) 230 return rc; 231 232 /*result file will contain 233 transcription | key | reference | result | PASSESD/FAILED */ 234 235 if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/ 236 { 237 pfprintf(fout, "NO PARSE FOR: %s|%s|%s| |", trans, key, ref); 238 if (strcmp("FAIL", result) == 0) 239 pfprintf(fout, "PASSED (%s)\n", trans); 240 else 241 pfprintf(fout, "FAILED (%s)\n", trans); 242 } 243 else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */ 244 { 245 for (i = 0; i < result_count; i++) 246 { 247 len = MAX_STR_LENGTH; 248 if ((rc = semanticResults[i]->getValue(semanticResults[i], key, value, &len)) == ESR_SUCCESS) 249 { 250 pfprintf(fout, "%s|%s|%s|%s|", trans, key, ref, value); 251 252 if (strcmp(value, ref) == 0 && strcmp("PASS", result) == 0) 253 pfprintf(fout, "PASSED\n"); 254 else 255 pfprintf(fout, "FAILED\n"); 256 } 257 else 258 { 259 pfprintf(fout, "ERROR: %s, while checking key='%s'\n", ESR_rc2str(rc), key); 260 } 261 } 262 263 /*deallocate semantic results*/ 264 for (i = 0; i < MAX_SEM_RESULTS; i++) 265 { 266 rc = semanticResults[i]->destroy(semanticResults[i]); 267 if (rc != ESR_SUCCESS) 268 return rc; 269 } 270 } 271 return ESR_SUCCESS; 272 } 273 274 int main(int argc, char **argv) 275 { 276 LCHAR trans[MAX_LINE_LENGTH]; 277 SR_Grammar* grammar = NULL; 278 ESR_ReturnCode rc; 279 LCHAR base[P_PATH_MAX] = L(""); 280 LCHAR infilename[P_PATH_MAX] = L(""); 281 LCHAR inRTfilename[P_PATH_MAX] = L(""); 282 LCHAR outfilename[P_PATH_MAX] = L(""); 283 PFile *fin = NULL, *fout = NULL; 284 int i; 285 LCHAR *rootrule = L("myRoot"), *p; 286 Opts opts = { 0, 0 }; 287 288 /* 289 * Initialize portable library. 290 */ 291 CHKLOG(rc, PMemInit()); 292 293 fin = PSTDIN; 294 fout = PSTDOUT; 295 296 if (argc < 3) 297 { 298 usage(argv[0]); 299 exit(EXIT_FAILURE); 300 } 301 for (i = 1; i < argc; ++i) 302 { 303 if (!LSTRCMP(argv[i], L("-base"))) 304 { 305 ++i; 306 LSTRCPY(base, argv[i]); 307 } 308 else if (!LSTRCMP(argv[i], L("-in"))) 309 { 310 ++i; 311 LSTRCPY(infilename, argv[i]); 312 } 313 else if (!LSTRCMP(argv[i], L("-out"))) 314 { 315 ++i; 316 LSTRCPY(outfilename, argv[i]); 317 } 318 else if (!LSTRCMP(argv[i], L("-itest"))) 319 { 320 ++i; 321 LSTRCPY(inRTfilename, argv[i]); 322 } 323 else if (!LSTRCMP(argv[i], L("-ids"))) 324 { 325 opts.use_parse_by_string_ids = 1; 326 } 327 else if (!LSTRCMP(argv[i], L("-allids"))) 328 { 329 opts.do_check_all_ids = 1; 330 opts.use_parse_by_string_ids = 1; 331 } 332 else 333 return usage(argv[0]); 334 } 335 336 CHK(rc, PLogInit(NULL, 0)); 337 338 rc = SR_GrammarLoad(base, &grammar); 339 if (rc != ESR_SUCCESS) 340 goto CLEANUP; 341 342 if (*outfilename) 343 { 344 if ((fout = pfopen(outfilename, "w")) == NULL) 345 { 346 pfprintf(PSTDOUT, "Could not open file: %s\n", outfilename); 347 rc = 1; 348 goto CLEANUP; 349 } 350 } 351 352 if (opts.do_check_all_ids) 353 { 354 rc = Parse(grammar, NULL, fout, &opts); 355 } 356 else if (*infilename) 357 { 358 if (LSTRCMP(infilename, "-") == 0) 359 { 360 fin = PSTDIN; 361 } 362 else if ((fin = pfopen(infilename, "r")) == NULL) 363 { 364 pfprintf(PSTDOUT, "Could not open file: %s\n", infilename); 365 rc = 1; 366 goto CLEANUP; 367 } 368 for (;;) 369 { 370 if (pfgets(trans, MAX_LINE_LENGTH, fin) == NULL) 371 { 372 if (!pfeof(fin)) 373 { 374 rc = ESR_READ_ERROR; 375 PLogError(ESR_rc2str(rc)); 376 } 377 break; 378 } 379 if (trans[0] == '#') continue; 380 lstrtrim(trans); 381 /* check for multiple space separators! */ 382 lstr_strip_multiple_spaces(trans); 383 pfprintf(fout, "Transcription: %s\n", trans); 384 if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS) 385 goto CLEANUP; 386 pfprintf(fout, "\n"); 387 } 388 } 389 else if (*inRTfilename) /*using a test file*/ 390 { 391 if ((fin = pfopen(inRTfilename, "r")) == NULL) 392 { 393 pfprintf(PSTDOUT, "Could not open test file: %s\n", inRTfilename); 394 rc = 1; 395 goto CLEANUP; 396 } 397 398 /*read through the test file parsing it into the variables 399 FORMAT: "the transciption" key "value" 400 */ 401 while (ESR_TRUE) 402 { 403 if (0) rc = process_single_key_line(grammar, fin, fout); 404 else rc = process_multi_key_line(grammar, rootrule, fin, fout); 405 if (rc == ESR_READ_ERROR) 406 { 407 rc = ESR_SUCCESS; 408 break; 409 } 410 } 411 } 412 else 413 { 414 /* get some transcriptions from the user */ 415 pfprintf(PSTDOUT, "\nSemantic Parser Test Program for esr (Nuance Communicaitions, 2007)\n"); 416 pfprintf(PSTDOUT, "'qqq' to quit\n"); 417 418 while (ESR_TRUE) 419 { 420 pfprintf(PSTDOUT, "> "); 421 422 if (!fgets(trans, MAX_LINE_LENGTH, PSTDIN)) 423 break; 424 // remove trailing whitespace 425 for(p=&trans[0]; *p!=0 && *p!='\n' && *p!='\r'; p++) {} 426 *p=0; 427 428 if (!LSTRCMP("qqq", trans)) 429 break; 430 else 431 if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS) 432 goto CLEANUP; 433 } 434 } 435 CLEANUP: 436 if (fin && fin != PSTDIN) 437 pfclose(fin); 438 if (fout && fout != PSTDOUT) 439 pfclose(fout); 440 if (grammar) grammar->destroy(grammar); 441 PLogShutdown(); 442 /* PANSIFileSystemDestroy(); 443 PFileSystemDestroy();*/ 444 PMemShutdown(); 445 return rc; 446 } 447 448 ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout) 449 { 450 LCHAR* position; 451 LCHAR line[MAX_LINE_LENGTH]; 452 LCHAR trans[MAX_LINE_LENGTH]; 453 LCHAR key[MAX_LINE_LENGTH]; 454 LCHAR refValue[MAX_LINE_LENGTH]; 455 LCHAR result[MAX_LINE_LENGTH]; 456 ESR_ReturnCode rc; 457 458 position = pfgets(line, MAX_LINE_LENGTH, fin); 459 if (line[0] == '#') 460 return ESR_SUCCESS; 461 if (!strncmp(line, "__END__", 7)) 462 return ESR_READ_ERROR; 463 if (position == NULL) 464 { 465 if (pfeof(fin)) 466 return ESR_READ_ERROR; 467 else 468 { 469 PLogError(L("ESR_READ_ERROR")); 470 return ESR_READ_ERROR; 471 } 472 } 473 474 //get the transcription to test 475 if ((position = strtok(line, "\"")) != NULL) 476 { 477 LSTRCPY(trans, position); 478 } 479 else 480 { 481 pfprintf(fout, "INVALID FORMAT for input line 1 \n"); 482 rc = ESR_INVALID_ARGUMENT; 483 goto CLEANUP; 484 } 485 486 //get the key (meaning) 487 if ((position = strtok(NULL, " \t")) != NULL) 488 { 489 LSTRCPY(key, position); 490 } 491 else 492 { 493 pfprintf(fout, "INVALID FORMAT for input line 2\n"); 494 rc = ESR_INVALID_ARGUMENT; 495 goto CLEANUP; 496 } 497 498 //get the expected return string 499 if ((position = strtok(NULL, "\"")) != NULL) 500 { 501 LSTRCPY(refValue, position); 502 } 503 else 504 { 505 pfprintf(fout, "INVALID FORMAT for input line 3\n"); 506 rc = ESR_INVALID_ARGUMENT; 507 goto CLEANUP; 508 } 509 510 //get the expected result PASS/FAIL 511 //there is no need to write PASS, if nothing is written PASS is assumed 512 if ((position = strtok(NULL, " \t\r\n\"")) != NULL) 513 { 514 LSTRCPY(result, position); 515 516 if (strcmp(result, "PASS") != 0 && strcmp(result, "FAIL") != 0) 517 { 518 pfprintf(fout, "INVALID FORMAT for input line, use either PASS or FAIL\n"); 519 rc = ESR_INVALID_ARGUMENT; 520 goto CLEANUP; 521 } 522 523 if ((rc = ParseTestSet(grammar, trans, key, refValue, result, fout)) != ESR_SUCCESS) 524 goto CLEANUP; 525 } 526 else 527 { 528 if ((rc = ParseTestSet(grammar, trans, key, refValue, "PASS", fout)) != ESR_SUCCESS) 529 goto CLEANUP; 530 } 531 rc = ESR_SUCCESS; 532 CLEANUP: 533 return rc; 534 } 535 536 ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout) 537 { 538 LCHAR *position, *p; 539 LCHAR line[MAX_LINE_LENGTH]; 540 LCHAR trans[MAX_LINE_LENGTH]; 541 LCHAR keyvals[MAX_LINE_LENGTH]; 542 ESR_ReturnCode rc; 543 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; 544 LCHAR refkey[MAX_LINE_LENGTH]; 545 LCHAR refval[MAX_LINE_LENGTH], value[MAX_STR_LENGTH]; 546 size_t i, j, len; 547 size_t result_count; 548 549 position = pfgets(line, MAX_LINE_LENGTH, fin); 550 if (line[0] == '#') 551 return ESR_SUCCESS; 552 if (!strncmp(line, "__END__", 7)) 553 return ESR_READ_ERROR; 554 if (position == NULL) 555 { 556 if (pfeof(fin)) 557 return ESR_READ_ERROR; 558 else 559 { 560 PLogError(L("ESR_READ_ERROR")); 561 return ESR_READ_ERROR; 562 } 563 } 564 565 /* we're trying to parse 566 Hello there : BONJOUR 567 */ 568 p = strtok(line, ":"); 569 LSTRCPY(trans, p); 570 /* strip trailing spaces */ 571 for (len = strlen(trans); len > 0 && trans[len-1] == ' '; len--) 572 trans[len-1] = 0; 573 574 p = strtok(NULL, "\n\r"); 575 /* strip leading spaces */ 576 while (*p == ' ' || *p == '\t') p++; 577 LSTRCPY(keyvals, p); 578 579 result_count = MAX_SEM_RESULTS; 580 for (i = 0; i < result_count; i++) 581 SR_SemanticResultCreate(&semanticResults[i]); 582 583 /* pfprintf(fout,"checking (%s) ref(%s)\n", trans, keyvals); */ 584 rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count); 585 if (rc != ESR_SUCCESS) 586 return rc; 587 588 /*result file will contain 589 transcription | key | reference | result | PASSESD/FAILED */ 590 591 if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/ 592 { 593 pfprintf(fout, "%s|%s| |", trans, keyvals); 594 if (!strcmp("FAIL", keyvals) || !strcmp(keyvals, "-")) 595 pfprintf(fout, "PASSED\n"); 596 else 597 pfprintf(fout, "FAILED\n"); 598 } 599 else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */ 600 { 601 size_t size, len; 602 LCHAR* keys_available[MAX_KEYS]; /* array of pointers to strings */ 603 size = MAX_KEYS; 604 rc = semanticResults[0]->getKeyList(semanticResults[0], (LCHAR**) & keys_available, &size); 605 606 for (p = strtok(keyvals, ";"); p; p = strtok(NULL, ";")) 607 { 608 sprintf(refkey, "%s.%s", rootrule, p); 609 p = strchr(refkey, '='); 610 assert(p); 611 *p = 0; 612 p++; 613 if (*p == '\'') p++; 614 LSTRCPY(refval, p); 615 if (refval[ strlen(refval)-1] == '\'') refval[strlen(refval)-1] = 0; 616 617 for (i = 0; i < result_count; i++) 618 { 619 len = MAX_STR_LENGTH; 620 for (j = 0; j < size; j++) 621 if (!strcmp(keys_available[j], refkey)) break; 622 if (j < size) 623 rc = semanticResults[i]->getValue(semanticResults[i], refkey, value, &len); 624 else 625 { 626 LSTRCPY(value, "<NOSUCHKEY>"); 627 rc = ESR_NO_MATCH_ERROR; 628 } 629 pfprintf(fout, "%s|%s|%s|%s|", trans, refkey, refval, value); 630 if (strcmp(value, refval) == 0) 631 pfprintf(fout, "PASSED\n"); 632 else 633 pfprintf(fout, "FAILED\n"); 634 } 635 } 636 637 /*deallocate semantic results*/ 638 for (i = 0; i < MAX_SEM_RESULTS; i++) 639 { 640 rc = semanticResults[i]->destroy(semanticResults[i]); 641 if (rc != ESR_SUCCESS) 642 PLogError("%s while destroying", ESR_rc2str(rc)); 643 } 644 } 645 return ESR_SUCCESS; 646 } 647 648