1 /*---------------------------------------------------------------------------* 2 * SemanticGraphImpl.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #include "SR_SemprocPrefix.h" 21 #include "SR_SemprocDefinitions.h" 22 #include "SR_SemanticGraph.h" 23 #include "SR_SemanticGraphImpl.h" 24 #include "SR_SemanticProcessorImpl.h" 25 #include "ESR_ReturnCode.h" 26 #include "passert.h" 27 #include "pendian.h" 28 #include "plog.h" 29 30 static const char* MTAG = __FILE__; 31 #define AVG_SCRIPTS_PER_WORD 2.5 32 #define SLOTNAME_INDICATOR "__" 33 #define SLOTNAME_INDICATOR_LEN 2 34 35 #define PTR_TO_IDX(ptr, base) ((asr_uint32_t) (ptr == NULL ? 0xFFFFFFFFu : \ 36 (asr_uint32_t)(ptr - base))) 37 #define IDX_TO_PTR(idx, base) (idx == 0xFFFFFFFFu ? NULL : base + idx) 38 39 ESR_ReturnCode SR_SemanticGraphCreate(SR_SemanticGraph** self) 40 { 41 SR_SemanticGraphImpl* impl; 42 43 if (self == NULL) 44 { 45 PLogError(L("ESR_INVALID_ARGUMENT")); 46 return ESR_INVALID_ARGUMENT; 47 } 48 impl = NEW(SR_SemanticGraphImpl, MTAG); 49 if (impl == NULL) 50 { 51 PLogError(L("ESR_OUT_OF_MEMORY")); 52 return ESR_OUT_OF_MEMORY; 53 } 54 /* do not assume NEW initialize impl as zero, do it here */ 55 memset(impl, 0, sizeof(SR_SemanticGraphImpl)); 56 57 impl->Interface.destroy = &SR_SemanticGraph_Destroy; 58 impl->Interface.unload = &SR_SemanticGraph_Unload; 59 impl->Interface.load = &SR_SemanticGraph_Load; 60 impl->Interface.save = &SR_SemanticGraph_Save; 61 impl->Interface.addWordToSlot = &SR_SemanticGraph_AddWordToSlot; 62 impl->Interface.reset = &SR_SemanticGraph_Reset; 63 impl->script_olabel_offset = SEMGRAPH_SCRIPT_OFFSET; 64 impl->scopes_olabel_offset = SEMGRAPH_SCOPE_OFFSET; 65 66 *self = (SR_SemanticGraph*) impl; 67 return ESR_SUCCESS; 68 } 69 70 71 /** 72 * Default implementation. 73 */ 74 ESR_ReturnCode SR_SemanticGraph_Destroy(SR_SemanticGraph* self) 75 { 76 SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self; 77 78 if (self == NULL) 79 { 80 PLogError(L("ESR_INVALID_ARGUMENT")); 81 return ESR_INVALID_ARGUMENT; 82 } 83 84 FREE(impl); 85 return ESR_SUCCESS; 86 } 87 88 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp); 89 90 91 /* private function */ 92 ESR_ReturnCode SR_SemanticGraph_LoadFromImage(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* g2g) 93 { 94 SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self; 95 PFile* fp = NULL; 96 struct 97 { 98 asr_uint32_t rec_context_image_size; 99 /* image data size of the recognition graph */ 100 asr_uint32_t format; 101 } 102 header; 103 ESR_ReturnCode rc = ESR_SUCCESS; 104 ESR_BOOL isLittleEndian; 105 /* 106 #if __BYTE_ORDER==__LITTLE_ENDIAN 107 isLittleEndian = ESR_TRUE; 108 #else 109 isLittleEndian = ESR_FALSE; 110 #endif 111 */ 112 isLittleEndian = ESR_TRUE; 113 114 fp = pfopen ( g2g, L("rb")); 115 /* CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp)); 116 CHKLOG(rc, PFileOpen(fp, L("rb")));*/ 117 118 if ( fp == NULL ) 119 goto CLEANUP; 120 121 /* header */ 122 if (pfread(&header, 4, 2, fp) != 2) 123 { 124 rc = ESR_READ_ERROR; 125 PLogError(ESR_rc2str(rc)); 126 goto CLEANUP; 127 } 128 129 if (pfseek(fp, header.rec_context_image_size, SEEK_SET)) 130 { 131 rc = ESR_READ_ERROR; 132 PLogError(L("ESR_READ_ERROR: could not seek to semgraph data")); 133 goto CLEANUP; 134 } 135 136 if (header.format == IMAGE_FORMAT_V2) 137 { 138 rc = sr_semanticgraph_loadV2(impl, ilabels, fp); 139 } 140 else 141 { 142 rc = ESR_INVALID_STATE; 143 PLogError("PCLG.txt P.txt inconsistency"); 144 goto CLEANUP; 145 } 146 147 CLEANUP: 148 if (fp) 149 pfclose (fp); 150 if (rc != ESR_SUCCESS) 151 { 152 if (impl->arc_token_list != NULL) 153 { 154 FREE(impl->arc_token_list); 155 impl->arc_token_list = NULL; 156 } 157 } 158 return rc; 159 } 160 161 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl, 162 PFile* fp); 163 164 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl, 165 PFile* fp); 166 167 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp) 168 { 169 unsigned int i, nfields; 170 ESR_ReturnCode rc = ESR_SUCCESS; 171 struct 172 { 173 asr_uint32_t format; 174 asr_uint32_t sgtype; 175 } 176 header; 177 asr_uint32_t tmp[32]; 178 179 if (pfread(&header, 4/*sz*/, 2/*ni*/, fp) != 2) 180 { 181 rc = ESR_READ_ERROR; 182 PLogError(L("ESR_READ_ERROR: could not read V2")); 183 goto CLEANUP; 184 } 185 186 if (header.sgtype == GrammarTypeItemList) 187 { 188 /* 189 tmp = new unsigned short[num_words]; 190 if( pfread( tmp, sizeof(tmp[0]), num_words, fp) != num_words) { 191 rc = ESR_READ_ERROR; 192 PLogMessage("can't read %d word script assocs\n", num_words); 193 goto CLEANUP; 194 } 195 */ 196 /* convert these to an arc_token_list or whatever */ 197 PLogError("not supported v2 itemlist type"); 198 rc = ESR_INVALID_STATE; 199 goto CLEANUP; 200 201 } 202 else 203 { 204 205 nfields = 2; 206 if (pfread(tmp, sizeof(tmp[0]), nfields, fp) != nfields) 207 { 208 rc = ESR_WRITE_ERROR; 209 PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset")); 210 goto CLEANUP; 211 } 212 i = 0; 213 impl->script_olabel_offset = (wordID)tmp[i++]; 214 impl->scopes_olabel_offset = (wordID)tmp[i++]; 215 ASSERT(i == nfields); 216 217 /* word arcs */ 218 if ((rc = deserializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS) 219 { 220 PLogError(ESR_rc2str(rc)); 221 goto CLEANUP; 222 } 223 224 /* use the ilabels provided externally (from recog graph ilabels) */ 225 impl->ilabels = ilabels; 226 227 /* scopes */ 228 if ((rc = deserializeWordMapV2(&impl->scopes_olabels, fp)) != ESR_SUCCESS) 229 { 230 PLogError(ESR_rc2str(rc)); 231 goto CLEANUP; 232 } 233 234 /* scripts */ 235 if ((rc = deserializeWordMapV2(&impl->scripts, fp)) != ESR_SUCCESS) 236 { 237 PLogError(ESR_rc2str(rc)); 238 goto CLEANUP; 239 } 240 } 241 CLEANUP: 242 return rc; 243 } 244 245 246 static arc_token_lnk get_first_arc_leaving_node1(arc_token* arc_token_list, 247 arcID num_arcs, 248 nodeID node) 249 { 250 arcID i; 251 for (i = 0; i < num_arcs; i++) 252 { 253 if ((nodeID)(int)arc_token_list[i].next_token_index == node) 254 return ARC_TOKEN_LNK(arc_token_list, i); 255 } 256 return ARC_TOKEN_NULL; 257 } 258 259 static int strlen_with_null(const char* word) 260 { /* from srec_context.c */ 261 int len = strlen(word) + 1; 262 if (len % 2 == 1) len++; 263 return len; 264 } 265 /* private function */ 266 ESR_ReturnCode SR_SemanticGraph_LoadFromTextFiles(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add) 267 { 268 ESR_ReturnCode rc = ESR_FATAL_ERROR; 269 arcID num_scripts; 270 int isConstString = 0; 271 LCHAR filename[MAX_STRING_LEN]; 272 LCHAR line[MAX_SCRIPT_LEN]; 273 LCHAR iword[MAX_STRING_LEN]; 274 LCHAR oword[MAX_SCRIPT_LEN]; 275 LCHAR *p; 276 unsigned int max_num_arc_tokens; 277 nodeID from_node, into_node; 278 wordID ilabel = 0; 279 labelID olabel = 0; 280 arc_token *atoken; 281 arc_token *last_atoken; 282 costdata cost = 0; 283 arcID num_arcs; 284 arc_token* arc_token_list; 285 long fpos; 286 PFile* p_text_file = NULL; 287 PFile* scripts_file; 288 SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self; 289 size_t lineNo; 290 unsigned int i; 291 wordID num_scope_words; 292 asr_int32_t num_scope_chars; 293 LCHAR* _tMp; /* used by IS_SCOPE_MARKER() below */ 294 295 /* use the ilables that are provided externally (from recog graph ilabels) */ 296 semgraph->ilabels = ilabels; 297 298 299 300 /* try to open the .script file */ 301 LSTRCPY(filename, basename); 302 LSTRCAT(filename, ".script"); 303 scripts_file = pfopen ( filename, L("r") ); 304 /* CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &scripts_file)); 305 CHKLOG(rc, PFileOpen(scripts_file, L("r")));*/ 306 307 if ( scripts_file == NULL ) 308 { 309 rc = ESR_OPEN_ERROR; 310 goto CLEANUP; 311 } 312 313 /* Load the scripts file 314 assumptions: 315 316 - the scripts file has each line ordered starting from 0 as such 317 <integer><space><script> 318 319 - the integer MUST become the index of the script in the wordmap 320 321 - output labels referenced in the semgraph are the integers (wordmap index) prepending with '_' 322 323 - output labels stored in the semgraph are actually integers which are equal to 324 script_olabel_offset + <integer> 325 */ 326 327 /* determine number of words/chars to allocate */ 328 fpos = pftell(scripts_file); 329 for (i = num_scripts = 0; pfgets(line, MAX_SCRIPT_LEN, scripts_file); num_scripts++) 330 { 331 size_t len = LSTRLEN(line) + 1; 332 if (len % 2) len++; 333 i = i + len; /* count the chars */ 334 } 335 pfseek(scripts_file, fpos, SEEK_SET); 336 337 /* on each line I will have 1 big word */ 338 /* figure that each script for dynamically added words will be a simple assignment 339 like myVar='someVal' ... which looks like almost 2.5 words, hence *2.5 */ 340 wordmap_create(&semgraph->scripts, i, num_scripts, (int)AVG_SCRIPTS_PER_WORD*num_words_to_add); 341 342 /* load up all the information */ 343 lineNo = 0; 344 while (pfgets(line, MAX_SCRIPT_LEN, scripts_file)) 345 { 346 ASSERT( sizeof( iword[0]) == sizeof(char)); // else more code to write! 347 if (sscanf(line, "%s ", iword) == 1) 348 { 349 LSTRCPY(oword, line + LSTRLEN(iword) + 1); 350 /* may actually have spaces in it and this is messing me up ... here is the fix */ 351 /* copy the line starting after the iword */ 352 for (i = 0, p = line + LSTRLEN(iword) + 1; *p; p++) 353 { 354 if (*p == '\\') 355 { 356 if (isConstString) 357 oword[i++] = *p; 358 ++p; 359 } 360 else if (*p == '\'') 361 isConstString = (isConstString ? 0 : 1) ; /* toggle */ 362 if (isConstString || !isspace(*p)) 363 oword[i++] = *p; 364 } 365 oword[i] = '\0'; 366 367 /* make sure that the index in the wordmap matches the line number */ 368 if (wordmap_add_word(semgraph->scripts, oword) != lineNo) 369 { 370 PLogError(L("ESR_READ_ERROR: internal error adding script (%d)"), num_words_to_add); 371 return ESR_NO_MATCH_ERROR; 372 } 373 lineNo++; 374 } 375 else 376 { 377 PLogMessage(L("can't parse line %s"), line); 378 passert(0); 379 } 380 } 381 pfclose (scripts_file); 382 383 /* try to open the P.txt file */ 384 LSTRCPY(filename, basename); 385 LSTRCAT(filename, ".P.txt"); 386 p_text_file = pfopen ( filename, L("r")); 387 /* CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &p_text_file)); 388 CHKLOG(rc, PFileOpen(p_text_file, L("r")));*/ 389 390 if ( p_text_file == NULL ) 391 goto CLEANUP; 392 393 /* determine number of word arcs to allocate */ 394 fpos = pftell(p_text_file); 395 num_scope_words = 0; 396 num_scope_chars = 0; 397 for (num_arcs = 0; pfgets(line, MAX_STRING_LEN, p_text_file); ++num_arcs) 398 { 399 if (num_arcs == MAXarcID) 400 break; /* error */ 401 if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4) 402 { 403 if (IS_SCOPE_MARKER(oword)) { 404 num_scope_words++; 405 num_scope_chars += strlen_with_null( oword); 406 if(num_scope_chars) num_scope_chars++ ; 407 } 408 } 409 } 410 max_num_arc_tokens = num_arcs + (arcID)num_words_to_add; 411 MEMCHK(rc, max_num_arc_tokens, MAXarcID); 412 pfseek(p_text_file, fpos, SEEK_SET); 413 414 semgraph->arc_token_list = NEW_ARRAY(arc_token,max_num_arc_tokens, L("semgraph.wordgraph")); 415 arc_token_list = semgraph->arc_token_list; 416 /* need to initialize my wordmap */ 417 wordmap_create(&semgraph->scopes_olabels, num_scope_chars, num_scope_words,0); // max_num_arc_tokens); 418 419 /* 1. first load up all the information */ 420 i = 0; 421 while (pfgets(line, MAX_STRING_LEN, p_text_file)) 422 { 423 if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4) 424 { 425 /* the cost is 0 by default */ 426 cost = 0; 427 /* since I am reading strings, and I want to store integers, I need to get 428 the index of the string by looking up in the ilabels wordmap */ 429 ilabel = wordmap_find_index(ilabels, iword); 430 431 /* now for the olabels, depending on the type of the label, I either use the index directly 432 or save the index in a wordmap which will eventually give me the right index. 433 Remember that the index must be offset by a certain value depending on which wordmap I'm using */ 434 435 if (IS_SCRIPT_MARKER(oword)) /* olabel type: script */ 436 { 437 olabel = (labelID) atoi(&oword[1]); 438 olabel = (wordID)(olabel + semgraph->script_olabel_offset); /* the offset */ 439 } 440 else if (IS_SCOPE_MARKER(oword)) /* olabel type: scope marker */ 441 { 442 /* check if the label is already in the wordmap, and reuse index */ 443 olabel = wordmap_find_index(semgraph->scopes_olabels, oword); 444 445 if (olabel == MAXwordID) /* not found so add to wordmap and get new index */ 446 olabel = wordmap_add_word(semgraph->scopes_olabels, oword); 447 olabel = (wordID)(olabel + semgraph->scopes_olabel_offset); /* the offset */ 448 } 449 else /* olabel type: input symbols hopefully !!! */ 450 { 451 /* if oword does not have a \t in the end, add a \t*/ 452 453 /* check if the label is already in the wordmap, and reuse index */ 454 olabel = wordmap_find_index(ilabels, oword); 455 456 if (olabel == MAXwordID) /* not found so add to wordmap and get new index */ 457 PLogMessage(L("output label not found: %s"), oword); 458 } 459 460 } 461 else if (sscanf(line, "%hu", &from_node) == 1) 462 { 463 into_node = MAXnodeID; 464 ilabel = MAXwordID; 465 olabel = MAXwordID; 466 cost = 0; 467 } 468 else 469 { 470 PLogMessage(L("can't parse line %s"), line); 471 passert(0); 472 } 473 474 /* okay, now that I have the data for the current arc, save it to the arc_token data structure*/ 475 atoken = &arc_token_list[i]; 476 ++i; 477 478 atoken->ilabel = ilabel; 479 atoken->olabel = olabel; 480 /* atoken->cost = cost; not used for now */ 481 482 /* initially this stores INTEGERS !!! , I need to cross-reference the integers with the 483 appropriate arc_token pointers (in the next steps for the algorithm) */ 484 atoken->first_next_arc = (arc_token_lnk)into_node; 485 atoken->next_token_index = (arc_token_lnk)from_node; 486 } 487 num_arcs = (arcID) i; 488 489 pfclose(p_text_file); 490 p_text_file = NULL; 491 492 wordmap_setbase(semgraph->scopes_olabels); 493 wordmap_ceiling(semgraph->scopes_olabels); /* we won't be adding scopes! */ 494 wordmap_setbase(semgraph->scripts); 495 496 /* 2. now do the internal cross references */ 497 /* in this pass we build the 1-to-1 links, and n-to-1 links in a graph */ 498 /* in other words... first_next_arc points to the first arc leaving the node */ 499 for (i = 0; i < num_arcs; ++i) 500 { 501 atoken = &arc_token_list[i]; 502 into_node = (nodeID)(int)atoken->first_next_arc; /* get the integer */ 503 atoken->first_next_arc = /* converts the integer id to a arc_token pointer */ 504 get_first_arc_leaving_node1(arc_token_list, num_arcs, (nodeID)(int)atoken->first_next_arc); 505 } 506 507 /* 3. now do more internal cross refs */ 508 /* in this pass we build the 1-to-n links */ 509 /* in other words ... setup the linked list of all arc leaving from the same node */ 510 last_atoken = &arc_token_list[0]; 511 for (i = 1; i < num_arcs; ++i) 512 { 513 atoken = &arc_token_list[i]; 514 /* if this arc and the last one do NOT leave the same node (i.e. from_node, see above), 515 then the next_token_index is not used */ 516 if (atoken->next_token_index != last_atoken->next_token_index) 517 last_atoken->next_token_index = ARC_TOKEN_NULL; 518 else 519 last_atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i); 520 last_atoken = atoken; 521 } 522 last_atoken->next_token_index = ARC_TOKEN_NULL; 523 524 #if DEBUG_ASTAR 525 /* under debug, it's nice to be able to see the words leaving the 526 destination node, they are stored sequentially in the debug ary */ 527 for (i = 0; i < num_arcs; i++) 528 { 529 LCHAR * p; 530 arc_token* tmp; 531 atoken = &arc_token_list[i]; 532 atoken->debug[0] = 0; 533 tmp = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc); 534 for (; tmp; tmp = ARC_TOKEN_PTR(arc_token_list, tmp->next_token_index)) 535 { 536 if (tmp->first_next_arc == ARC_TOKEN_NULL) 537 p = "END"; 538 else if (!tmp->label) 539 p = "NULL"; 540 else 541 p = tmp->label; 542 if (strlen(atoken->debug) + strlen(p) + 6 < 64) 543 { 544 strcat(atoken->debug, p); 545 strcat(atoken->debug, " "); 546 } 547 else 548 { 549 strcat(atoken->debug, "..."); 550 break; 551 } 552 } 553 } 554 #endif 555 semgraph->arc_token_list_len = (arcID)max_num_arc_tokens; 556 /* initialize the freelist */ 557 if (num_arcs < max_num_arc_tokens) 558 { 559 semgraph->arc_token_freelist = &semgraph->arc_token_list[num_arcs]; 560 for (i = num_arcs; i < max_num_arc_tokens - 1; i++) 561 { 562 semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL; 563 semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(semgraph->arc_token_list, (i + 1)); 564 } 565 semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL; 566 semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_NULL; 567 } 568 else 569 semgraph->arc_token_freelist = NULL; 570 571 /* for dynamic addition */ 572 for (i = 0; i < MAX_NUM_SLOTS; i++) 573 semgraph->arcs_for_slot[i] = NULL; 574 575 semgraph->arc_token_insert_start = semgraph->arc_token_list + num_arcs; 576 semgraph->arc_token_insert_end = NULL; 577 return ESR_SUCCESS; 578 CLEANUP: 579 if (p_text_file) 580 pfclose (p_text_file); 581 return rc; 582 } 583 584 ESR_ReturnCode SR_SemanticGraph_Load(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add) 585 { 586 ESR_ReturnCode rc; 587 588 if (LSTRSTR(basename, L(".g2g"))) 589 { 590 rc = SR_SemanticGraph_LoadFromImage(self, ilabels, basename); 591 } 592 else 593 { 594 rc = SR_SemanticGraph_LoadFromTextFiles(self, ilabels, basename, num_words_to_add); 595 } 596 return rc; 597 } 598 599 /** 600 * Unload Sem graph 601 */ 602 ESR_ReturnCode SR_SemanticGraph_Unload(SR_SemanticGraph* self) 603 { 604 SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self; 605 606 /* see the wordmap_create in the Load function */ 607 wordmap_destroy(&semgraph->scopes_olabels); 608 wordmap_destroy(&semgraph->scripts); 609 610 FREE(semgraph->arc_token_list); 611 semgraph->arc_token_list = 0; 612 return ESR_SUCCESS; 613 } 614 615 ESR_ReturnCode sr_semanticgraph_saveV1(SR_SemanticGraphImpl* impl, const LCHAR* g2g); 616 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g); 617 618 ESR_ReturnCode SR_SemanticGraph_Save(SR_SemanticGraph* self, const LCHAR* g2g, int version_number) 619 { 620 SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self; 621 ESR_ReturnCode rc = ESR_SUCCESS; 622 623 if (version_number == 2) 624 { 625 rc = sr_semanticgraph_saveV2(impl, g2g); 626 } 627 else 628 { 629 PLogError("invalid version_number %d\n", version_number); 630 rc = ESR_INVALID_ARGUMENT; 631 } 632 return rc; 633 } 634 635 636 int sr_semanticgraph_get_type(SR_SemanticGraphImpl* impl) 637 { 638 arc_token *atoken, *arc_token_list = impl->arc_token_list; 639 arc_token_lnk mergept; 640 int expected_ilabel; 641 atoken = impl->arc_token_list; 642 643 /* 0 1 eps { 644 1 2 13e_avenue myRoot} 645 ... 646 1 2 13e_avenue myRoot} 647 2 */ 648 if (atoken->ilabel != WORD_EPSILON_LABEL) 649 return GrammarTypeBNF; 650 atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc); 651 if (!atoken) 652 return GrammarTypeBNF; 653 mergept = atoken->first_next_arc; 654 expected_ilabel = NUM_ITEMLIST_HDRWDS; 655 for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->next_token_index)) 656 { 657 if (atoken->first_next_arc != mergept) 658 return GrammarTypeBNF; 659 if (atoken->ilabel != expected_ilabel) 660 return GrammarTypeBNF; 661 expected_ilabel++; 662 } 663 if (expected_ilabel != impl->ilabels->num_words) 664 return GrammarTypeBNF; 665 atoken = ARC_TOKEN_PTR(arc_token_list, mergept); 666 for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc)) 667 { 668 if (atoken->next_token_index != ARC_TOKEN_NULL) 669 return GrammarTypeBNF; 670 if (atoken->ilabel != WORD_EPSILON_LABEL && 671 !(atoken->ilabel == MAXwordID && atoken->olabel == MAXwordID)) 672 return GrammarTypeBNF; 673 } 674 return GrammarTypeItemList; 675 } 676 677 #define SEMGR_OUTPUT_FORMAT_V2 478932784 678 679 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g) 680 { 681 ESR_ReturnCode rc; 682 PFile* fp; 683 asr_uint32_t tmp[32]; 684 struct 685 { 686 asr_uint32_t format; 687 asr_uint32_t sgtype; 688 } 689 header; 690 unsigned int i, nfields; 691 692 fp = pfopen ( g2g, L("r+b")); 693 /* CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp)); 694 CHKLOG(rc, PFileOpen(fp, L("r+b")));*/ 695 696 if ( fp == NULL ) 697 { 698 rc = ESR_OPEN_ERROR; 699 goto CLEANUP; 700 } 701 702 pfseek(fp, 0, SEEK_END); 703 704 header.format = IMAGE_FORMAT_V2; 705 header.sgtype = sr_semanticgraph_get_type(impl); 706 header.sgtype = GrammarTypeBNF; 707 708 #ifdef SREC_ENGINE_VERBOSE_LOGGING 709 PLogMessage("sr_semanticgraph_saveV2() semgraphtype %d", header.sgtype); 710 #endif 711 if (pfwrite(&header, 4 /*sz*/, 2/*ni*/, fp) != 2) 712 { 713 rc = ESR_WRITE_ERROR; 714 PLogError(L("ESR_WRITE_ERROR: could not write V2")); 715 goto CLEANUP; 716 } 717 718 if (header.sgtype == GrammarTypeItemList) 719 { 720 arc_token *parser, *atok; 721 722 /* write num_words size array of short script ids 723 this might be just a y=x array, but it could be there 724 are synonyms, eg. NEW_YORK NEW_YORK_CITY -> same script 725 */ 726 parser = impl->arc_token_list; 727 parser = ARC_TOKEN_PTR(impl->arc_token_list, parser->first_next_arc); 728 for (i = NUM_ITEMLIST_HDRWDS; i < impl->ilabels->num_words; i++) 729 { 730 for (atok = parser; atok; atok = ARC_TOKEN_PTR(impl->arc_token_list, atok->next_token_index)) 731 { 732 if (atok->ilabel == i) break; 733 } 734 if (!atok) 735 { 736 rc = ESR_INVALID_STATE; 737 PLogError("Can't find word %d in semgraph\n", i); 738 goto CLEANUP; 739 } 740 tmp[0] = atok->olabel; 741 if (pfwrite(tmp, sizeof(tmp[0]), 1, fp) != 1) 742 { 743 rc = ESR_WRITE_ERROR; 744 PLogError(L("ESR_WRITE_ERROR: could not write V2")); 745 goto CLEANUP; 746 } 747 } 748 if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS) 749 { 750 PLogError(ESR_rc2str(rc)); 751 goto CLEANUP; 752 } 753 } 754 else 755 { 756 757 i = 0; 758 tmp[i++] = impl->script_olabel_offset; 759 tmp[i++] = impl->scopes_olabel_offset; 760 nfields = i; 761 762 if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields) 763 { 764 rc = ESR_WRITE_ERROR; 765 PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset")); 766 goto CLEANUP; 767 } 768 769 /* word arcs */ 770 if ((rc = serializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS) 771 { 772 PLogError(ESR_rc2str(rc)); 773 goto CLEANUP; 774 } 775 776 /* do not WRITE ilabels... this is a ref to the olabels from rec context */ 777 778 /* scopes */ 779 if ((rc = serializeWordMapV2(impl->scopes_olabels, fp)) != ESR_SUCCESS) 780 { 781 PLogError(ESR_rc2str(rc)); 782 goto CLEANUP; 783 } 784 785 if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS) 786 { 787 PLogError(ESR_rc2str(rc)); 788 goto CLEANUP; 789 } 790 #ifdef SREC_ENGINE_VERBOSE_LOGGING 791 PLogMessage("G2G done WR semg %d", pftell(fp)); 792 #endif 793 } 794 rc = ESR_SUCCESS; 795 CLEANUP: 796 if (fp) 797 pfclose (fp); 798 return rc; 799 } 800 801 arc_token* arc_tokens_find_ilabel(arc_token* base, arc_token* arc_token_list, wordID ilabel) 802 { 803 arc_token* p; 804 for (p = arc_token_list; p != NULL; p = ARC_TOKEN_PTR(base, p->next_token_index)) 805 if (p->ilabel == ilabel) return p; 806 return NULL; 807 } 808 809 arc_token* arc_tokens_get_free(arc_token* base, arc_token** arc_token_freelist) 810 { 811 arc_token* tmp = (*arc_token_freelist); 812 if (tmp == NULL) 813 return NULL; 814 (*arc_token_freelist) = ARC_TOKEN_PTR(base, tmp->next_token_index); 815 tmp->ilabel = tmp->olabel = 0; 816 tmp->next_token_index = ARC_TOKEN_NULL; 817 tmp->first_next_arc = ARC_TOKEN_NULL; 818 return tmp; 819 } 820 821 int arc_tokens_list_size(arc_token* base, arc_token* head) 822 { 823 arc_token* tmp = head; 824 int count = 0; 825 for (; tmp; tmp = ARC_TOKEN_PTR(base, tmp->next_token_index)) 826 { 827 count++; 828 } 829 return count; 830 } 831 832 void arc_tokens_free_list(arc_token* base, arc_token** arc_token_freelist, arc_token* head) 833 { 834 arc_token *tail, *next = (arc_token*)1; 835 if (head == NULL) 836 return; 837 for (tail = head; ; tail = next) 838 { 839 next = ARC_TOKEN_PTR(base, tail->next_token_index); 840 if (next == NULL) break; 841 } 842 tail->next_token_index = ARC_TOKEN_PTR2LNK(base, (*arc_token_freelist)); 843 *arc_token_freelist = head; 844 } 845 846 ESR_ReturnCode find_in_union_of_scripts(const LCHAR* union_script, const LCHAR* script, ESR_BOOL* isFound) 847 { 848 const LCHAR* start; 849 const LCHAR* end; 850 const LCHAR* p; 851 const LCHAR* q; 852 853 if (union_script == NULL || script == NULL) 854 return ESR_INVALID_ARGUMENT; 855 856 start = LSTRCHR(union_script, L('\'')); 857 if (start == NULL) 858 return ESR_INVALID_ARGUMENT; 859 860 start++; /* point to first char after \' */ 861 862 end = LSTRCHR(start, L('\'')); /* point to last \' */ 863 if (end == NULL) 864 return ESR_INVALID_ARGUMENT; 865 866 p = start; 867 868 start = LSTRCHR(script, L('\'')); 869 if (start == NULL) 870 return ESR_INVALID_ARGUMENT; 871 start++; /* point to first char after \' */ 872 873 q = start; 874 875 while (p < end) 876 { 877 if (*p == MULTIPLE_MEANING_JOIN_CHAR) /* if at the end of a meaning (not end of union) 878 and p matched q all the way up to join char then found! */ 879 { 880 *isFound = ESR_TRUE; 881 return ESR_SUCCESS; 882 } 883 else if (*p == *q) /* while same keep going */ 884 { 885 if (*p == *(end - 1)) /* if at the end and p matched q all the way then found! */ 886 { 887 *isFound = ESR_TRUE; 888 return ESR_SUCCESS; 889 } 890 q++; 891 } 892 else /* skip to next meaning after join char */ 893 { 894 while (*p != MULTIPLE_MEANING_JOIN_CHAR && p < end) 895 p++; 896 /* reset q */ 897 q = start; 898 } 899 p++; 900 } 901 902 *isFound = ESR_FALSE; 903 return ESR_SUCCESS; 904 } 905 906 #define QUOTE_CHAR L('\'') 907 int count_num_literals(const LCHAR* a, const LCHAR** start_points, int max_num_start_points) 908 { 909 int num = 0; 910 const LCHAR *p, *q = a; 911 const LCHAR *end = a + LSTRLEN(a); 912 while (1) 913 { 914 /* look for starting QUOTE_CHAR */ 915 for (p = q; p < end; p++) 916 { 917 if (*p == ESC_CHAR) p++; 918 else if (*p == QUOTE_CHAR) break; 919 } 920 if (p == end) break; 921 if (num > max_num_start_points) break; /* just abort the counting! */ 922 start_points[num] = p; 923 /* look for ending QUOTE_CHAR */ 924 for (q = p + 1; q < end; q++) 925 { 926 if (*q == ESC_CHAR) q++; 927 else if (*q == QUOTE_CHAR) break; 928 } 929 if (q == end) /* does not close! */ 930 return -1; 931 p = ++q; 932 num++; 933 } 934 return num; 935 } 936 int union_literal_pair(LCHAR* o, LCHAR* a, LCHAR* b, LCHAR** pptra) 937 { 938 LCHAR *enda, *ptra, *endb, *ptrb; 939 LCHAR *p, *ptro; 940 enda = a + LSTRLEN(a); 941 endb = b + LSTRLEN(b); 942 /* capture the data from a to ptra */ 943 for (ptra = a + 1; ptra < enda; ptra++) 944 { 945 if (*ptra == ESC_CHAR) ptra++; 946 else if (*ptra == QUOTE_CHAR) break; 947 } 948 /* capture the data from b to ptrb */ 949 for (ptrb = b + 1; ptrb < endb; ptrb++) 950 { 951 if (*ptrb == ESC_CHAR) ptrb++; 952 else if (*ptrb == QUOTE_CHAR) break; 953 } 954 /* now make the output */ 955 ptro = o; 956 *ptro++ = QUOTE_CHAR; 957 for (p = a + 1; p < ptra; p++) *ptro++ = *p; 958 *ptro++ = MULTIPLE_MEANING_JOIN_CHAR; 959 for (p = b + 1; p < ptrb; p++) *ptro++ = *p; 960 *ptro++ = QUOTE_CHAR; 961 *ptro++ = 0; 962 *pptra = ptra + 1; 963 return 0; 964 } 965 966 /* now handles n1='52';n2='62'; UNION n1='53';nx='63'; */ 967 968 ESR_ReturnCode make_union_of_scripts(LCHAR* union_script, const size_t max_len, const LCHAR* a, const LCHAR* b) 969 { 970 int i, num_literals_in_a, num_literals_in_b; 971 LCHAR *spa[8], *spb[8], *spo[8], *ptra; 972 973 if (a == NULL || b == NULL) 974 return ESR_INVALID_ARGUMENT; 975 976 num_literals_in_a = count_num_literals(a, (const LCHAR **)spa, 8); 977 num_literals_in_b = count_num_literals(b, (const LCHAR **)spb, 8); 978 979 if (num_literals_in_a == 0 && num_literals_in_b == 0) 980 { 981 if (LSTRLEN(a) > max_len) return ESR_BUFFER_OVERFLOW; 982 else 983 { 984 LSTRCPY(union_script, a); 985 return ESR_SUCCESS; 986 } 987 } 988 else if (num_literals_in_a != num_literals_in_b) 989 { 990 return ESR_INVALID_ARGUMENT; 991 } 992 993 /* V='Springfield_IL' union V='Springfield_MA' is V='Springfield_IL#Springfield_MA' */ 994 /* 18 + 18 -2 = 33 + 1 for NULL */ 995 if ((LSTRLEN(a) + LSTRLEN(b) - 2) > max_len) 996 { 997 PLogError("Temp buffer (size %d) to hold union of multiple meanings (size %d) is too small", max_len, (LSTRLEN(a) + LSTRLEN(b) - 2)); 998 return ESR_BUFFER_OVERFLOW; 999 } 1000 1001 LSTRCPY(union_script, a); 1002 for (i = 0; i < num_literals_in_a; i++) 1003 { 1004 count_num_literals(union_script, (const LCHAR **)spo, 8); 1005 /* here union_script is n0='52';n1='62'; */ 1006 union_literal_pair(spo[i], spa[i], spb[i], &ptra); 1007 #ifdef _WIN32 1008 if (LSTRLEN(spo[i]) > MAX_SEMPROC_VALUE) 1009 pfprintf(PSTDOUT, "Warning: won't be able to parse this script! len %d>%d %s\n", LSTRLEN(spo[i]), MAX_SEMPROC_VALUE, spo[i]); 1010 #endif 1011 /* here union_script is n0='52#53' */ 1012 LSTRCAT(union_script, ptra); 1013 /* here union_script is n0='52#53';n1='62'; */ 1014 } 1015 return ESR_SUCCESS; 1016 } 1017 1018 /** 1019 * Default implementation. 1020 */ 1021 ESR_ReturnCode SR_SemanticGraph_AddWordToSlot(SR_SemanticGraph* self, const LCHAR* _slot, const LCHAR* word, const LCHAR* script, const ESR_BOOL newWordAddedToFST) 1022 { 1023 struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self; 1024 arc_token *token, *tmp; 1025 arc_token *tmp_arc_token_list; 1026 wordID wdID, scriptID, old_scriptID; 1027 wordID slotID; 1028 LCHAR union_script[MAX_STRING_LEN]; /* sizeof used elsewhere */ 1029 ESR_ReturnCode rc; int i; 1030 int tmp_arc_token_list_len; 1031 int offset; 1032 #define MAX_WORD_LEN 128 1033 char veslot[MAX_WORD_LEN]; 1034 1035 if (script == NULL || *script == L('\0') || !LSTRCMP(script, L("NULL"))) 1036 return ESR_SUCCESS; /* no script to add so keep going */ 1037 1038 /* find out if the word I am adding already exists. If it already exists, then that means that I 1039 potentially am adding an alternate meaning for the word */ 1040 /* the slotname in .PCLG.txt and .map files use __ as the indicator. Xufang */ 1041 if(_slot[0] == '@') { 1042 strcpy(veslot,SLOTNAME_INDICATOR); 1043 strcat(veslot,_slot+1); 1044 strcat(veslot,SLOTNAME_INDICATOR); 1045 } else 1046 strcpy(veslot, _slot); 1047 1048 slotID = wordmap_find_rule_index(impl->ilabels, veslot); 1049 if (slotID == MAXwordID) 1050 { 1051 PLogError(L("ESR_NO_MATCH_ERROR: Could not find slotID in wordmap %s"), _slot); 1052 return ESR_NO_MATCH_ERROR; 1053 } 1054 wdID = wordmap_find_index_in_rule(impl->ilabels, word, slotID); 1055 if (wdID == MAXwordID) 1056 { 1057 PLogError(L("ESR_NO_MATCH_ERROR: Could not find wordID/slotID in wordmap %s/%d"), word, slotID); 1058 return ESR_NO_MATCH_ERROR; 1059 } 1060 1061 /* **this is an optimization step** */ 1062 /* Is word already added in this slot? if so, get the token pointer, else, token is NULL 1063 * 1064 * the assumption is that FST_AddWordToGrammar will tell us if this word was newly added in the FST, or 1065 * if the word was added at least 1 iteration ago, meaning that I have already added it to my 1066 * semgraph slot at some earlier point 1067 */ 1068 if (newWordAddedToFST) 1069 token = NULL; 1070 else 1071 token = arc_tokens_find_ilabel(impl->arc_token_list, impl->arcs_for_slot[slotID], wdID); 1072 1073 #define FST_GROW_FACTOR 12/10 1074 #define FST_GROWARCS_MIN 100 1075 if (token == NULL) /* new word to add to slot */ 1076 { 1077 /* add the script if new */ 1078 scriptID = wordmap_find_index(impl->scripts, script); 1079 if (scriptID == MAXwordID) 1080 scriptID = wordmap_add_word(impl->scripts, script); 1081 if (scriptID == MAXwordID) 1082 { 1083 PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap")); 1084 return ESR_OUT_OF_MEMORY; 1085 } 1086 1087 token = impl->arcs_for_slot[slotID]; 1088 tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist)); 1089 if (tmp == NULL) 1090 { 1091 #if defined (FST_GROW_FACTOR) 1092 tmp_arc_token_list_len = impl->arc_token_list_len * FST_GROW_FACTOR; 1093 if(tmp_arc_token_list_len - impl->arc_token_list_len <=FST_GROWARCS_MIN) 1094 tmp_arc_token_list_len+=FST_GROWARCS_MIN; 1095 1096 tmp_arc_token_list= NEW_ARRAY(arc_token,tmp_arc_token_list_len, L("semgraph.wordgraph")); 1097 if(!tmp_arc_token_list) { 1098 PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of semgraph.wordgraph")); 1099 return ESR_OUT_OF_MEMORY; 1100 } 1101 memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token)); 1102 1103 for(i=0; i<MAX_NUM_SLOTS;i++) 1104 { 1105 if(impl->arcs_for_slot[i] != NULL) { 1106 offset = impl->arcs_for_slot[i] - impl->arc_token_list; 1107 impl->arcs_for_slot[i] = tmp_arc_token_list + offset; 1108 } 1109 } 1110 token = impl->arcs_for_slot[slotID]; 1111 1112 ASSERT( impl->arc_token_freelist == NULL); 1113 1114 impl->arc_token_freelist = tmp_arc_token_list + impl->arc_token_list_len; 1115 1116 FREE(impl->arc_token_list); 1117 impl->arc_token_insert_start = tmp_arc_token_list + (impl->arc_token_insert_start - impl->arc_token_list); //Rabih fix 1118 impl->arc_token_list = tmp_arc_token_list; 1119 1120 for (i = impl->arc_token_list_len; i < tmp_arc_token_list_len - 1; i++) 1121 { 1122 impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL; 1123 impl->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, (i + 1)); 1124 } 1125 impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL; 1126 impl->arc_token_list[i].next_token_index = ARC_TOKEN_NULL; 1127 1128 impl->arc_token_list_len = tmp_arc_token_list_len; 1129 tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist)); 1130 } 1131 #endif 1132 if(tmp == NULL) { 1133 PLogError(L("ESR_OUT_OF_MEMORY: Error adding more arcs to graph\n")); 1134 return ESR_OUT_OF_MEMORY; 1135 } 1136 impl->arcs_for_slot[slotID] = tmp; 1137 tmp->next_token_index = ARC_TOKEN_PTR2LNK(impl->arc_token_list, token); 1138 tmp->ilabel = wdID; 1139 tmp->olabel = (wordID)(impl->script_olabel_offset + scriptID); 1140 } 1141 else 1142 { 1143 old_scriptID = token->olabel - impl->script_olabel_offset; 1144 1145 if (!LSTRCMP(impl->scripts->words[old_scriptID], script)) 1146 { 1147 /* nothing to do, we have the word, same meaning again so do nothing */ 1148 } 1149 else 1150 { 1151 1152 CHKLOG(rc, make_union_of_scripts(union_script, sizeof(union_script), impl->scripts->words[old_scriptID], script)); 1153 1154 #ifdef SREC_ENGINE_VERBOSE_LOGGING 1155 PLogMessage(L("Adding alternate meaning %s for word %s (%s) in slot %s\n"), script, word, 1156 impl->scripts->words[old_scriptID], impl->ilabels->words[slotID]); 1157 #endif 1158 /* add the union as if new (if not already there) */ 1159 scriptID = wordmap_find_index(impl->scripts, union_script); 1160 if (scriptID == MAXwordID) 1161 scriptID = wordmap_add_word(impl->scripts, union_script); 1162 if (scriptID == MAXwordID) 1163 { 1164 PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap")); 1165 return ESR_OUT_OF_MEMORY; 1166 } 1167 1168 /* make the olabel point to the union */ 1169 token->olabel = (wordID)(impl->script_olabel_offset + scriptID); 1170 } 1171 } 1172 return ESR_SUCCESS; 1173 CLEANUP: 1174 return rc; 1175 } 1176 1177 1178 /** 1179 * Default implementation. 1180 */ 1181 ESR_ReturnCode SR_SemanticGraph_Reset(SR_SemanticGraph* self) 1182 { 1183 struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self; 1184 wordID slotid; 1185 arc_token* tmp; 1186 arc_token *tmp_arc_token_list; 1187 1188 wordmap_reset(impl->scopes_olabels); 1189 wordmap_reset(impl->scripts); 1190 wordmap_reset(impl->ilabels); //Rabih: I added this 1191 for (slotid = 1; slotid < impl->ilabels->num_slots; slotid++) 1192 { 1193 tmp = impl->arcs_for_slot[slotid]; 1194 arc_tokens_free_list(impl->arc_token_list, &(impl->arc_token_freelist), tmp); 1195 impl->arcs_for_slot[slotid] = NULL; 1196 #if defined(SANITY_CHECK) 1197 int count; 1198 for (count = 0, tmp = impl->arc_token_freelist; tmp != NULL; 1199 tmp = ARC_TOKEN_PTR(impl->arc_token_list, tmp->next_token_index)) 1200 { 1201 ASSERT(tmp->ilabel != 79324); 1202 tmp->ilabel = 79324; 1203 count++; 1204 } 1205 PLogError("after reset freelist size is %d", count); 1206 #endif 1207 } 1208 1209 // Rabih : Reset the arc_token_list 1210 if(impl->ilabels->num_words == impl->ilabels->num_base_words) 1211 {} 1212 else{ 1213 impl->arc_token_list_len = (size_t)(impl->arc_token_insert_start - impl->arc_token_list); 1214 tmp_arc_token_list= NEW_ARRAY(arc_token,impl->arc_token_list_len, L("semgraph.wordgraph")); 1215 memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token)); 1216 1217 impl->arc_token_freelist = NULL; 1218 1219 FREE(impl->arc_token_list); 1220 impl->arc_token_list = tmp_arc_token_list; 1221 } 1222 return ESR_SUCCESS; 1223 } 1224 1225 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl, 1226 PFile* fp) 1227 { 1228 int i; 1229 asr_uint32_t idx; 1230 arcID tmp[32]; 1231 1232 if (pfwrite(&impl->arc_token_list_len, 2, 1, fp) != 1) 1233 return ESR_WRITE_ERROR; 1234 1235 idx = PTR_TO_IDX(impl->arc_token_freelist, impl->arc_token_list); 1236 1237 if (pfwrite(&idx, 4, 1, fp) != 1) 1238 return ESR_WRITE_ERROR; 1239 1240 idx = PTR_TO_IDX(impl->arc_token_insert_start, impl->arc_token_list); 1241 1242 if (pfwrite(&idx, 4, 1, fp) != 1) 1243 return ESR_WRITE_ERROR; 1244 1245 idx = 0; 1246 if (pfwrite(&idx, 4, 1, fp) != 1) 1247 return ESR_WRITE_ERROR; 1248 1249 for (i = 0; i < impl->arc_token_list_len; ++i) 1250 { 1251 arc_token* token = &impl->arc_token_list[i]; 1252 tmp[0] = token->ilabel; 1253 tmp[1] = token->olabel; 1254 tmp[2] = ARC_TOKEN_IDX(impl->arc_token_list, token->first_next_arc); 1255 tmp[3] = ARC_TOKEN_IDX(impl->arc_token_list, token->next_token_index); 1256 if (pfwrite(tmp, sizeof(tmp[0]), 4, fp) != 4) 1257 return ESR_WRITE_ERROR; 1258 } 1259 1260 /* new, fixes load/save bug 2007 July 31 1261 todo: change 4 to sizeof(asr_uint32) */ 1262 if(1) { 1263 asr_uint32_t idx[MAX_NUM_SLOTS]; 1264 for(i=0; i<MAX_NUM_SLOTS; i++) 1265 idx[i] = PTR_TO_IDX(impl->arcs_for_slot[i], impl->arc_token_list); 1266 if (pfwrite(&idx, 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS) 1267 return ESR_WRITE_ERROR; 1268 } 1269 1270 return ESR_SUCCESS; 1271 } 1272 1273 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl, 1274 PFile* fp) 1275 { 1276 int i; 1277 asr_uint32_t idx; 1278 ESR_ReturnCode rc = ESR_SUCCESS; 1279 arcID tmp[32]; 1280 1281 if (pfread(&impl->arc_token_list_len, 2, 1, fp) != 1) 1282 { 1283 rc = ESR_READ_ERROR; 1284 PLogError(L("ESR_READ_ERROR: could not read arc_token_list_len")); 1285 return rc; 1286 } 1287 1288 impl->arc_token_list = NEW_ARRAY(arc_token, 1289 impl->arc_token_list_len, 1290 L("semgraph.wordgraph")); 1291 1292 if (impl->arc_token_list == NULL) 1293 { 1294 rc = ESR_OUT_OF_MEMORY; 1295 PLogError(ESR_rc2str(rc)); 1296 return ESR_OUT_OF_MEMORY; 1297 } 1298 1299 if (pfread(&idx, 4, 1, fp) != 1) 1300 { 1301 rc = ESR_READ_ERROR; 1302 PLogError(ESR_rc2str(rc)); 1303 goto CLEANUP; 1304 } 1305 1306 impl->arc_token_freelist = IDX_TO_PTR(idx, impl->arc_token_list); 1307 1308 if (pfread(&idx, 4, 1, fp) != 1) 1309 { 1310 rc = ESR_READ_ERROR; 1311 PLogError(ESR_rc2str(rc)); 1312 goto CLEANUP; 1313 } 1314 1315 impl->arc_token_insert_start = IDX_TO_PTR(idx, impl->arc_token_list); 1316 // impl->arc_token_insert_start = impl->arc_token_list + impl->arc_token_list_len; // Rabih's fix 1317 1318 if (pfread(&idx, 4, 1, fp) != 1) 1319 { 1320 rc = ESR_READ_ERROR; 1321 PLogError(ESR_rc2str(rc)); 1322 goto CLEANUP; 1323 } 1324 impl->arc_token_insert_end = 0; 1325 1326 for (i = 0; i < impl->arc_token_list_len; ++i) 1327 { 1328 arc_token* token = &impl->arc_token_list[i]; 1329 if (pfread(tmp, sizeof(tmp[0]), 4, fp) != 4) 1330 { 1331 rc = ESR_READ_ERROR; 1332 goto CLEANUP; 1333 } 1334 token->ilabel = tmp[0]; 1335 token->olabel = tmp[1]; 1336 if (tmp[2] == MAXarcID) 1337 token->first_next_arc = ARC_TOKEN_NULL; 1338 else 1339 token->first_next_arc = ARC_TOKEN_LNK(impl->arc_token_list, tmp[2]); 1340 if (tmp[3] == MAXarcID) 1341 token->next_token_index = ARC_TOKEN_NULL; 1342 else 1343 token->next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, tmp[3]); 1344 } 1345 1346 /* new, fixes load/save bug 2007 July 31 1347 todo: change 4 to sizeof(asr_uint32) */ 1348 if(1) { 1349 asr_uint32_t idx[MAX_NUM_SLOTS]; 1350 if (pfread(&idx[0], 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS) { 1351 rc = ESR_READ_ERROR; 1352 PLogError(ESR_rc2str(rc)); 1353 goto CLEANUP; 1354 } 1355 for(i=0; i<MAX_NUM_SLOTS; i++) 1356 impl->arcs_for_slot[i] = IDX_TO_PTR(idx[i], impl->arc_token_list); 1357 } 1358 1359 return ESR_SUCCESS; 1360 1361 CLEANUP: 1362 FREE(impl->arc_token_list); 1363 impl->arc_token_list = 1364 impl->arc_token_freelist = 1365 impl->arc_token_insert_start = 1366 impl->arc_token_insert_end = NULL; 1367 return rc; 1368 } 1369