Home | History | Annotate | Download | only in src
      1 /*---------------------------------------------------------------------------*
      2  *  SemanticGraphImpl.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     20 #include "SR_SemprocPrefix.h"
     21 #include "SR_SemprocDefinitions.h"
     22 #include "SR_SemanticGraph.h"
     23 #include "SR_SemanticGraphImpl.h"
     24 #include "SR_SemanticProcessorImpl.h"
     25 #include "ESR_ReturnCode.h"
     26 #include "passert.h"
     27 #include "pendian.h"
     28 #include "plog.h"
     30 static const char* MTAG = __FILE__;
     31 #define AVG_SCRIPTS_PER_WORD 2.5
     32 #define SLOTNAME_INDICATOR "__"
     33 #define SLOTNAME_INDICATOR_LEN 2
     35 #define PTR_TO_IDX(ptr, base) ((asr_uint32_t) (ptr == NULL ? 0xFFFFFFFFu : \
     36                                (asr_uint32_t)(ptr - base)))
     37 #define IDX_TO_PTR(idx, base) (idx == 0xFFFFFFFFu ? NULL : base + idx)
     39 ESR_ReturnCode SR_SemanticGraphCreate(SR_SemanticGraph** self)
     40 {
     41   SR_SemanticGraphImpl* impl;
     43   if (self == NULL)
     44   {
     45     PLogError(L("ESR_INVALID_ARGUMENT"));
     46     return ESR_INVALID_ARGUMENT;
     47   }
     48   impl = NEW(SR_SemanticGraphImpl, MTAG);
     49   if (impl == NULL)
     50   {
     51     PLogError(L("ESR_OUT_OF_MEMORY"));
     52     return ESR_OUT_OF_MEMORY;
     53   }
     54   /* do not assume NEW initialize impl as zero, do it here */
     55   memset(impl, 0, sizeof(SR_SemanticGraphImpl));
     57   impl->Interface.destroy = &SR_SemanticGraph_Destroy;
     58   impl->Interface.unload = &SR_SemanticGraph_Unload;
     59   impl->Interface.load = &SR_SemanticGraph_Load;
     60   impl->Interface.save = &SR_SemanticGraph_Save;
     61   impl->Interface.addWordToSlot = &SR_SemanticGraph_AddWordToSlot;
     62   impl->Interface.reset = &SR_SemanticGraph_Reset;
     63   impl->script_olabel_offset = SEMGRAPH_SCRIPT_OFFSET;
     64   impl->scopes_olabel_offset = SEMGRAPH_SCOPE_OFFSET;
     66   *self = (SR_SemanticGraph*) impl;
     67   return ESR_SUCCESS;
     68 }
     71 /**
     72  * Default implementation.
     73  */
     74 ESR_ReturnCode SR_SemanticGraph_Destroy(SR_SemanticGraph* self)
     75 {
     76   SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
     78   if (self == NULL)
     79   {
     80     PLogError(L("ESR_INVALID_ARGUMENT"));
     81     return ESR_INVALID_ARGUMENT;
     82   }
     84   FREE(impl);
     85   return ESR_SUCCESS;
     86 }
     88 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp);
     91 /* private function */
     92 ESR_ReturnCode SR_SemanticGraph_LoadFromImage(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* g2g)
     93 {
     94   SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
     95   PFile* fp = NULL;
     96   struct
     97   {
     98     asr_uint32_t rec_context_image_size;
     99     /*  image data size of the recognition graph */
    100     asr_uint32_t format;
    101   }
    102   header;
    103   ESR_ReturnCode rc = ESR_SUCCESS;
    104   ESR_BOOL isLittleEndian;
    105   /*
    106     #if __BYTE_ORDER==__LITTLE_ENDIAN
    107     isLittleEndian = ESR_TRUE;
    108     #else
    109     isLittleEndian = ESR_FALSE;
    110     #endif
    111   */
    112   isLittleEndian = ESR_TRUE;
    114   fp = pfopen ( g2g, L("rb"));
    115 /*  CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
    116   CHKLOG(rc, PFileOpen(fp, L("rb")));*/
    118   if ( fp == NULL )
    119     goto CLEANUP;
    121   /* header */
    122   if (pfread(&header, 4, 2, fp) != 2)
    123   {
    124     rc = ESR_READ_ERROR;
    125     PLogError(ESR_rc2str(rc));
    126     goto CLEANUP;
    127   }
    129   if (pfseek(fp, header.rec_context_image_size, SEEK_SET))
    130   {
    131     rc = ESR_READ_ERROR;
    132     PLogError(L("ESR_READ_ERROR: could not seek to semgraph data"));
    133     goto CLEANUP;
    134   }
    136   if (header.format == IMAGE_FORMAT_V2)
    137   {
    138     rc = sr_semanticgraph_loadV2(impl, ilabels, fp);
    139   }
    140   else
    141   {
    142     rc = ESR_INVALID_STATE;
    143     PLogError("PCLG.txt P.txt inconsistency");
    144     goto CLEANUP;
    145   }
    147 CLEANUP:
    148   if (fp)
    149     pfclose (fp);
    150   if (rc != ESR_SUCCESS)
    151   {
    152     if (impl->arc_token_list != NULL)
    153     {
    154       FREE(impl->arc_token_list);
    155       impl->arc_token_list = NULL;
    156     }
    157   }
    158   return rc;
    159 }
    161 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
    162     PFile* fp);
    164 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
    165     PFile* fp);
    167 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp)
    168 {
    169   unsigned int i, nfields;
    170   ESR_ReturnCode rc = ESR_SUCCESS;
    171   struct
    172   {
    173     asr_uint32_t format;
    174     asr_uint32_t sgtype;
    175   }
    176   header;
    177   asr_uint32_t tmp[32];
    179   if (pfread(&header, 4/*sz*/, 2/*ni*/, fp) != 2)
    180   {
    181     rc = ESR_READ_ERROR;
    182     PLogError(L("ESR_READ_ERROR: could not read V2"));
    183     goto CLEANUP;
    184   }
    186   if (header.sgtype == GrammarTypeItemList)
    187   {
    188     /*
    189       tmp = new unsigned short[num_words];
    190       if( pfread( tmp, sizeof(tmp[0]), num_words, fp) != num_words) {
    191       rc = ESR_READ_ERROR;
    192       PLogMessage("can't read %d word script assocs\n", num_words);
    193       goto CLEANUP;
    194       }
    195     */
    196     /* convert these to an arc_token_list or whatever */
    197     PLogError("not supported v2 itemlist type");
    198     rc = ESR_INVALID_STATE;
    199     goto CLEANUP;
    201   }
    202   else
    203   {
    205     nfields = 2;
    206     if (pfread(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
    207     {
    208       rc = ESR_WRITE_ERROR;
    209       PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
    210       goto CLEANUP;
    211     }
    212     i = 0;
    213     impl->script_olabel_offset = (wordID)tmp[i++];
    214     impl->scopes_olabel_offset = (wordID)tmp[i++];
    215     ASSERT(i == nfields);
    217     /* word arcs */
    218     if ((rc = deserializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
    219     {
    220       PLogError(ESR_rc2str(rc));
    221       goto CLEANUP;
    222     }
    224     /* use the ilabels provided externally (from recog graph ilabels) */
    225     impl->ilabels = ilabels;
    227     /* scopes */
    228     if ((rc = deserializeWordMapV2(&impl->scopes_olabels, fp)) != ESR_SUCCESS)
    229     {
    230       PLogError(ESR_rc2str(rc));
    231       goto CLEANUP;
    232     }
    234     /* scripts */
    235     if ((rc = deserializeWordMapV2(&impl->scripts, fp)) != ESR_SUCCESS)
    236     {
    237       PLogError(ESR_rc2str(rc));
    238       goto CLEANUP;
    239     }
    240   }
    241 CLEANUP:
    242   return rc;
    243 }
    246 static arc_token_lnk get_first_arc_leaving_node1(arc_token* arc_token_list,
    247     arcID num_arcs,
    248     nodeID node)
    249 {
    250   arcID i;
    251   for (i = 0; i < num_arcs; i++)
    252   {
    253     if ((nodeID)(int)arc_token_list[i].next_token_index == node)
    254       return ARC_TOKEN_LNK(arc_token_list, i);
    255   }
    256   return ARC_TOKEN_NULL;
    257 }
    259 static int strlen_with_null(const char* word)
    260 { /* from srec_context.c */
    261   int len = strlen(word) + 1;
    262   if (len % 2 == 1) len++;
    263   return len;
    264 }
    265 /* private function */
    266 ESR_ReturnCode SR_SemanticGraph_LoadFromTextFiles(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
    267 {
    268   ESR_ReturnCode rc = ESR_FATAL_ERROR;
    269   arcID num_scripts;
    270   int isConstString = 0;
    271   LCHAR filename[MAX_STRING_LEN];
    272   LCHAR line[MAX_SCRIPT_LEN];
    273   LCHAR iword[MAX_STRING_LEN];
    274   LCHAR oword[MAX_SCRIPT_LEN];
    275   LCHAR *p;
    276   unsigned int max_num_arc_tokens;
    277   nodeID from_node, into_node;
    278   wordID ilabel = 0;
    279   labelID olabel = 0;
    280   arc_token *atoken;
    281   arc_token *last_atoken;
    282   costdata cost = 0;
    283   arcID num_arcs;
    284   arc_token* arc_token_list;
    285   long fpos;
    286   PFile* p_text_file = NULL;
    287   PFile* scripts_file;
    288   SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
    289   size_t lineNo;
    290   unsigned int i;
    291   wordID num_scope_words;
    292   asr_int32_t num_scope_chars;
    293   LCHAR* _tMp;    /* used by IS_SCOPE_MARKER() below */
    295   /* use the ilables that are provided externally (from recog graph ilabels) */
    296   semgraph->ilabels = ilabels;
    300   /* try to open the .script file */
    301   LSTRCPY(filename, basename);
    302   LSTRCAT(filename, ".script");
    303   scripts_file = pfopen ( filename, L("r") );
    304 /*  CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &scripts_file));
    305   CHKLOG(rc, PFileOpen(scripts_file, L("r")));*/
    307   if ( scripts_file == NULL )
    308   {
    309     rc = ESR_OPEN_ERROR;
    310     goto CLEANUP;
    311   }
    313   /* Load the scripts file
    314     assumptions:
    316   - the scripts file has each line ordered starting from 0 as such
    317   <integer><space><script>
    319   - the integer MUST become the index of the script in the wordmap
    321   - output labels referenced in the semgraph are the integers (wordmap index) prepending with '_'
    323   - output labels stored in the semgraph are actually integers which are equal to
    324     script_olabel_offset + <integer>
    325   */
    327   /* determine number of words/chars to allocate */
    328   fpos = pftell(scripts_file);
    329   for (i = num_scripts = 0; pfgets(line, MAX_SCRIPT_LEN, scripts_file); num_scripts++)
    330   {
    331     size_t len = LSTRLEN(line) + 1;
    332     if (len % 2) len++;
    333     i = i + len; /* count the chars */
    334   }
    335   pfseek(scripts_file, fpos, SEEK_SET);
    337   /* on each line I will have 1 big word */
    338   /* figure that each script for dynamically added words will be a simple assignment
    339      like myVar='someVal' ... which looks like almost 2.5 words, hence *2.5 */
    340   wordmap_create(&semgraph->scripts, i, num_scripts, (int)AVG_SCRIPTS_PER_WORD*num_words_to_add);
    342   /* load up all the information */
    343   lineNo = 0;
    344   while (pfgets(line, MAX_SCRIPT_LEN, scripts_file))
    345   {
    346     ASSERT( sizeof( iword[0]) == sizeof(char)); // else more code to write!
    347     if (sscanf(line, "%s ", iword) == 1)
    348     {
    349       LSTRCPY(oword, line + LSTRLEN(iword) + 1);
    350       /* may actually have spaces in it and this is messing me up ... here is the fix */
    351       /* copy the line starting after the iword */
    352       for (i = 0, p = line + LSTRLEN(iword) + 1; *p; p++)
    353       {
    354         if (*p == '\\')
    355         {
    356           if (isConstString)
    357             oword[i++] = *p;
    358           ++p;
    359         }
    360         else if (*p == '\'')
    361           isConstString = (isConstString ? 0 : 1) ; /* toggle */
    362         if (isConstString || !isspace(*p))
    363           oword[i++] = *p;
    364       }
    365       oword[i] = '\0';
    367       /* make sure that the index in the wordmap matches the line number */
    368       if (wordmap_add_word(semgraph->scripts, oword) != lineNo)
    369       {
    370         PLogError(L("ESR_READ_ERROR: internal error adding script (%d)"), num_words_to_add);
    371         return ESR_NO_MATCH_ERROR;
    372       }
    373       lineNo++;
    374     }
    375     else
    376     {
    377       PLogMessage(L("can't parse line %s"), line);
    378       passert(0);
    379     }
    380   }
    381   pfclose (scripts_file);
    383   /* try to open the P.txt file */
    384   LSTRCPY(filename, basename);
    385   LSTRCAT(filename, ".P.txt");
    386   p_text_file = pfopen ( filename, L("r"));
    387 /*  CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &p_text_file));
    388   CHKLOG(rc, PFileOpen(p_text_file, L("r")));*/
    390   if ( p_text_file == NULL )
    391     goto CLEANUP;
    393   /* determine number of word arcs to allocate */
    394   fpos = pftell(p_text_file);
    395   num_scope_words = 0;
    396   num_scope_chars = 0;
    397   for (num_arcs = 0; pfgets(line, MAX_STRING_LEN, p_text_file); ++num_arcs)
    398   {
    399     if (num_arcs == MAXarcID)
    400       break; /* error */
    401 	if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
    402     {
    403 		if (IS_SCOPE_MARKER(oword)) {
    404 			num_scope_words++;
    405 			num_scope_chars += strlen_with_null( oword);
    406 			if(num_scope_chars) num_scope_chars++ ;
    407   }
    408 	}
    409   }
    410   max_num_arc_tokens = num_arcs + (arcID)num_words_to_add;
    411   MEMCHK(rc, max_num_arc_tokens, MAXarcID);
    412   pfseek(p_text_file, fpos, SEEK_SET);
    414   semgraph->arc_token_list = NEW_ARRAY(arc_token,max_num_arc_tokens, L("semgraph.wordgraph"));
    415   arc_token_list = semgraph->arc_token_list;
    416   /* need to initialize my wordmap */
    417   wordmap_create(&semgraph->scopes_olabels, num_scope_chars, num_scope_words,0); // max_num_arc_tokens);
    419   /* 1. first load up all the information */
    420   i = 0;
    421   while (pfgets(line, MAX_STRING_LEN, p_text_file))
    422   {
    423     if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
    424     {
    425       /* the cost is 0 by default */
    426       cost = 0;
    427       /* since I am reading strings, and I want to store integers, I need to get
    428       the index of the string by looking up in the ilabels wordmap */
    429       ilabel = wordmap_find_index(ilabels, iword);
    431       /* now for the olabels, depending on the type of the label, I either use the index directly
    432       or save the index in a wordmap which will eventually give me the right index.
    433       Remember that the index must be offset by a certain value depending on which wordmap I'm using */
    435       if (IS_SCRIPT_MARKER(oword)) /* olabel type: script */
    436       {
    437         olabel = (labelID) atoi(&oword[1]);
    438         olabel = (wordID)(olabel + semgraph->script_olabel_offset); /* the offset */
    439       }
    440       else if (IS_SCOPE_MARKER(oword)) /* olabel type: scope marker */
    441       {
    442         /* check if the label is already in the wordmap, and reuse index */
    443         olabel = wordmap_find_index(semgraph->scopes_olabels, oword);
    445         if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
    446           olabel = wordmap_add_word(semgraph->scopes_olabels, oword);
    447         olabel = (wordID)(olabel + semgraph->scopes_olabel_offset); /* the offset */
    448       }
    449       else /* olabel type: input symbols hopefully !!! */
    450       {
    451 	/* if oword does not have a \t in the end, add a \t*/
    453         /* check if the label is already in the wordmap, and reuse index */
    454         olabel = wordmap_find_index(ilabels, oword);
    456         if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
    457           PLogMessage(L("output label not found: %s"), oword);
    458       }
    460     }
    461     else if (sscanf(line, "%hu", &from_node) == 1)
    462     {
    463       into_node = MAXnodeID;
    464       ilabel = MAXwordID;
    465       olabel = MAXwordID;
    466       cost = 0;
    467     }
    468     else
    469     {
    470       PLogMessage(L("can't parse line %s"), line);
    471       passert(0);
    472     }
    474     /* okay, now that I have the data for the current arc, save it to the arc_token data structure*/
    475     atoken = &arc_token_list[i];
    476     ++i;
    478     atoken->ilabel = ilabel;
    479     atoken->olabel = olabel;
    480     /* atoken->cost = cost; not used for now */
    482     /* initially this stores INTEGERS !!! , I need to cross-reference the integers with the
    483     appropriate arc_token pointers (in the next steps for the algorithm) */
    484     atoken->first_next_arc = (arc_token_lnk)into_node;
    485     atoken->next_token_index = (arc_token_lnk)from_node;
    486   }
    487   num_arcs = (arcID) i;
    489   pfclose(p_text_file);
    490   p_text_file = NULL;
    492   wordmap_setbase(semgraph->scopes_olabels);
    493   wordmap_ceiling(semgraph->scopes_olabels); /* we won't be adding scopes! */
    494   wordmap_setbase(semgraph->scripts);
    496   /* 2. now do the internal cross references */
    497   /* in this pass we build the 1-to-1 links, and n-to-1 links in a graph */
    498   /* in other words... first_next_arc points to the first arc leaving the node */
    499   for (i = 0; i < num_arcs; ++i)
    500   {
    501     atoken = &arc_token_list[i];
    502     into_node = (nodeID)(int)atoken->first_next_arc; /* get the integer */
    503     atoken->first_next_arc = /* converts the integer id to a arc_token pointer */
    504       get_first_arc_leaving_node1(arc_token_list, num_arcs, (nodeID)(int)atoken->first_next_arc);
    505   }
    507   /* 3. now do more internal cross refs */
    508   /* in this pass we build the 1-to-n links */
    509   /* in other words ... setup the linked list of all arc leaving from the same node */
    510   last_atoken = &arc_token_list[0];
    511   for (i = 1; i < num_arcs; ++i)
    512   {
    513     atoken = &arc_token_list[i];
    514     /* if this arc and the last one do NOT leave the same node (i.e. from_node, see above),
    515     then the next_token_index is not used */
    516     if (atoken->next_token_index != last_atoken->next_token_index)
    517       last_atoken->next_token_index = ARC_TOKEN_NULL;
    518     else
    519       last_atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i);
    520     last_atoken = atoken;
    521   }
    522   last_atoken->next_token_index = ARC_TOKEN_NULL;
    524 #if DEBUG_ASTAR
    525   /* under debug, it's nice to be able to see the words leaving the
    526      destination node, they are stored sequentially in the debug ary */
    527   for (i = 0; i < num_arcs; i++)
    528   {
    529     LCHAR * p;
    530     arc_token* tmp;
    531     atoken = &arc_token_list[i];
    532     atoken->debug[0] = 0;
    533     tmp = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
    534     for (; tmp; tmp = ARC_TOKEN_PTR(arc_token_list, tmp->next_token_index))
    535     {
    536       if (tmp->first_next_arc == ARC_TOKEN_NULL)
    537         p = "END";
    538       else if (!tmp->label)
    539         p = "NULL";
    540       else
    541         p = tmp->label;
    542       if (strlen(atoken->debug) + strlen(p) + 6 < 64)
    543       {
    544         strcat(atoken->debug, p);
    545         strcat(atoken->debug, " ");
    546       }
    547       else
    548       {
    549         strcat(atoken->debug, "...");
    550         break;
    551       }
    552     }
    553   }
    554 #endif
    555   semgraph->arc_token_list_len = (arcID)max_num_arc_tokens;
    556   /* initialize the freelist */
    557   if (num_arcs < max_num_arc_tokens)
    558   {
    559     semgraph->arc_token_freelist = &semgraph->arc_token_list[num_arcs];
    560     for (i = num_arcs; i < max_num_arc_tokens - 1; i++)
    561     {
    562       semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
    563       semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(semgraph->arc_token_list, (i + 1));
    564     }
    565     semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
    566     semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
    567   }
    568   else
    569     semgraph->arc_token_freelist = NULL;
    571   /* for dynamic addition */
    572   for (i = 0; i < MAX_NUM_SLOTS; i++)
    573     semgraph->arcs_for_slot[i] = NULL;
    575 	semgraph->arc_token_insert_start = semgraph->arc_token_list + num_arcs;
    576     semgraph->arc_token_insert_end = NULL;
    577   return ESR_SUCCESS;
    578 CLEANUP:
    579   if (p_text_file)
    580     pfclose (p_text_file);
    581   return rc;
    582 }
    584 ESR_ReturnCode SR_SemanticGraph_Load(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
    585 {
    586   ESR_ReturnCode rc;
    588   if (LSTRSTR(basename, L(".g2g")))
    589   {
    590     rc = SR_SemanticGraph_LoadFromImage(self, ilabels, basename);
    591   }
    592   else
    593   {
    594     rc = SR_SemanticGraph_LoadFromTextFiles(self, ilabels, basename, num_words_to_add);
    595   }
    596   return rc;
    597 }
    599 /**
    600  * Unload Sem graph
    601  */
    602 ESR_ReturnCode SR_SemanticGraph_Unload(SR_SemanticGraph* self)
    603 {
    604   SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
    606   /* see the wordmap_create in the Load function */
    607   wordmap_destroy(&semgraph->scopes_olabels);
    608   wordmap_destroy(&semgraph->scripts);
    610   FREE(semgraph->arc_token_list);
    611   semgraph->arc_token_list = 0;
    612   return ESR_SUCCESS;
    613 }
    615 ESR_ReturnCode sr_semanticgraph_saveV1(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
    616 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
    618 ESR_ReturnCode SR_SemanticGraph_Save(SR_SemanticGraph* self, const LCHAR* g2g, int version_number)
    619 {
    620   SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
    621   ESR_ReturnCode rc = ESR_SUCCESS;
    623   if (version_number == 2)
    624   {
    625     rc = sr_semanticgraph_saveV2(impl,  g2g);
    626   }
    627   else
    628   {
    629     PLogError("invalid version_number %d\n", version_number);
    630     rc = ESR_INVALID_ARGUMENT;
    631   }
    632   return rc;
    633 }
    636 int sr_semanticgraph_get_type(SR_SemanticGraphImpl* impl)
    637 {
    638   arc_token *atoken, *arc_token_list = impl->arc_token_list;
    639   arc_token_lnk mergept;
    640   int expected_ilabel;
    641   atoken = impl->arc_token_list;
    643   /* 0 1 eps {
    644      1 2 13e_avenue myRoot}
    645      ...
    646      1 2 13e_avenue myRoot}
    647      2 */
    648   if (atoken->ilabel != WORD_EPSILON_LABEL)
    649     return GrammarTypeBNF;
    650   atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
    651   if (!atoken)
    652     return GrammarTypeBNF;
    653   mergept = atoken->first_next_arc;
    654   expected_ilabel = NUM_ITEMLIST_HDRWDS;
    655   for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->next_token_index))
    656   {
    657     if (atoken->first_next_arc != mergept)
    658       return GrammarTypeBNF;
    659     if (atoken->ilabel != expected_ilabel)
    660       return GrammarTypeBNF;
    661     expected_ilabel++;
    662   }
    663   if (expected_ilabel != impl->ilabels->num_words)
    664     return GrammarTypeBNF;
    665   atoken = ARC_TOKEN_PTR(arc_token_list, mergept);
    666   for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc))
    667   {
    668     if (atoken->next_token_index != ARC_TOKEN_NULL)
    669       return GrammarTypeBNF;
    670     if (atoken->ilabel != WORD_EPSILON_LABEL &&
    671         !(atoken->ilabel == MAXwordID && atoken->olabel == MAXwordID))
    672       return GrammarTypeBNF;
    673   }
    674   return GrammarTypeItemList;
    675 }
    677 #define SEMGR_OUTPUT_FORMAT_V2 478932784
    679 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g)
    680 {
    681   ESR_ReturnCode rc;
    682   PFile* fp;
    683   asr_uint32_t tmp[32];
    684   struct
    685   {
    686     asr_uint32_t format;
    687     asr_uint32_t sgtype;
    688   }
    689   header;
    690   unsigned int i, nfields;
    692   fp = pfopen ( g2g, L("r+b"));
    693 /*  CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
    694   CHKLOG(rc, PFileOpen(fp, L("r+b")));*/
    696   if ( fp == NULL )
    697   {
    698   	rc = ESR_OPEN_ERROR;
    699     goto CLEANUP;
    700   }
    702   pfseek(fp, 0, SEEK_END);
    704   header.format = IMAGE_FORMAT_V2;
    705   header.sgtype = sr_semanticgraph_get_type(impl);
    706   header.sgtype = GrammarTypeBNF;
    709   PLogMessage("sr_semanticgraph_saveV2() semgraphtype %d", header.sgtype);
    710 #endif
    711   if (pfwrite(&header, 4 /*sz*/, 2/*ni*/, fp) != 2)
    712   {
    713     rc = ESR_WRITE_ERROR;
    714     PLogError(L("ESR_WRITE_ERROR: could not write V2"));
    715     goto CLEANUP;
    716   }
    718   if (header.sgtype == GrammarTypeItemList)
    719   {
    720     arc_token *parser, *atok;
    722     /* write num_words size array of short script ids
    723        this might be just a y=x array, but it could be there
    724        are synonyms, eg. NEW_YORK NEW_YORK_CITY -> same script
    725     */
    726     parser = impl->arc_token_list;
    727     parser = ARC_TOKEN_PTR(impl->arc_token_list, parser->first_next_arc);
    728     for (i = NUM_ITEMLIST_HDRWDS; i < impl->ilabels->num_words; i++)
    729     {
    730       for (atok = parser; atok; atok = ARC_TOKEN_PTR(impl->arc_token_list, atok->next_token_index))
    731       {
    732         if (atok->ilabel == i) break;
    733       }
    734       if (!atok)
    735       {
    736         rc = ESR_INVALID_STATE;
    737         PLogError("Can't find word %d in semgraph\n", i);
    738         goto CLEANUP;
    739       }
    740       tmp[0] = atok->olabel;
    741       if (pfwrite(tmp, sizeof(tmp[0]), 1, fp) != 1)
    742       {
    743         rc = ESR_WRITE_ERROR;
    744         PLogError(L("ESR_WRITE_ERROR: could not write V2"));
    745         goto CLEANUP;
    746       }
    747     }
    748     if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
    749     {
    750       PLogError(ESR_rc2str(rc));
    751       goto CLEANUP;
    752     }
    753   }
    754   else
    755   {
    757     i = 0;
    758     tmp[i++] = impl->script_olabel_offset;
    759     tmp[i++] = impl->scopes_olabel_offset;
    760     nfields = i;
    762     if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
    763     {
    764       rc = ESR_WRITE_ERROR;
    765       PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
    766       goto CLEANUP;
    767     }
    769     /* word arcs */
    770     if ((rc = serializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
    771     {
    772       PLogError(ESR_rc2str(rc));
    773       goto CLEANUP;
    774     }
    776     /* do not WRITE ilabels... this is a ref to the olabels from rec context */
    778     /* scopes */
    779     if ((rc = serializeWordMapV2(impl->scopes_olabels, fp)) != ESR_SUCCESS)
    780     {
    781       PLogError(ESR_rc2str(rc));
    782       goto CLEANUP;
    783     }
    785     if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
    786     {
    787       PLogError(ESR_rc2str(rc));
    788       goto CLEANUP;
    789     }
    791     PLogMessage("G2G done WR semg %d", pftell(fp));
    792 #endif
    793   }
    794   rc = ESR_SUCCESS;
    795 CLEANUP:
    796   if (fp)
    797     pfclose (fp);
    798   return rc;
    799 }
    801 arc_token* arc_tokens_find_ilabel(arc_token* base, arc_token* arc_token_list, wordID ilabel)
    802 {
    803   arc_token* p;
    804   for (p = arc_token_list; p != NULL; p = ARC_TOKEN_PTR(base, p->next_token_index))
    805     if (p->ilabel == ilabel) return p;
    806   return NULL;
    807 }
    809 arc_token* arc_tokens_get_free(arc_token* base, arc_token** arc_token_freelist)
    810 {
    811   arc_token* tmp = (*arc_token_freelist);
    812   if (tmp == NULL)
    813     return NULL;
    814   (*arc_token_freelist) = ARC_TOKEN_PTR(base, tmp->next_token_index);
    815   tmp->ilabel = tmp->olabel = 0;
    816   tmp->next_token_index = ARC_TOKEN_NULL;
    817   tmp->first_next_arc = ARC_TOKEN_NULL;
    818   return tmp;
    819 }
    821 int arc_tokens_list_size(arc_token* base, arc_token* head)
    822 {
    823   arc_token* tmp = head;
    824   int count = 0;
    825   for (; tmp; tmp = ARC_TOKEN_PTR(base, tmp->next_token_index))
    826   {
    827     count++;
    828   }
    829   return count;
    830 }
    832 void arc_tokens_free_list(arc_token* base, arc_token** arc_token_freelist, arc_token* head)
    833 {
    834   arc_token *tail, *next = (arc_token*)1;
    835   if (head == NULL)
    836     return;
    837   for (tail = head; ; tail = next)
    838   {
    839     next = ARC_TOKEN_PTR(base, tail->next_token_index);
    840     if (next == NULL) break;
    841   }
    842   tail->next_token_index = ARC_TOKEN_PTR2LNK(base, (*arc_token_freelist));
    843   *arc_token_freelist = head;
    844 }
    846 ESR_ReturnCode find_in_union_of_scripts(const LCHAR* union_script, const LCHAR* script, ESR_BOOL* isFound)
    847 {
    848   const LCHAR* start;
    849   const LCHAR* end;
    850   const LCHAR* p;
    851   const LCHAR* q;
    853   if (union_script == NULL || script == NULL)
    854     return ESR_INVALID_ARGUMENT;
    856   start = LSTRCHR(union_script, L('\''));
    857   if (start == NULL)
    858     return ESR_INVALID_ARGUMENT;
    860   start++; /* point to first char after \' */
    862   end = LSTRCHR(start, L('\'')); /* point to last \' */
    863   if (end == NULL)
    864     return ESR_INVALID_ARGUMENT;
    866   p = start;
    868   start = LSTRCHR(script, L('\''));
    869   if (start == NULL)
    870     return ESR_INVALID_ARGUMENT;
    871   start++; /* point to first char after \' */
    873   q = start;
    875   while (p < end)
    876   {
    877     if (*p == MULTIPLE_MEANING_JOIN_CHAR) /* if at the end of a meaning (not end of union)
    878                                                   and p matched q all the way up to join char then found! */
    879     {
    880       *isFound = ESR_TRUE;
    881       return ESR_SUCCESS;
    882     }
    883     else if (*p == *q) /* while same keep going */
    884     {
    885       if (*p == *(end - 1)) /* if at the end and p matched q all the way then found! */
    886       {
    887         *isFound = ESR_TRUE;
    888         return ESR_SUCCESS;
    889       }
    890       q++;
    891     }
    892     else /* skip to next meaning after join char */
    893     {
    894       while (*p != MULTIPLE_MEANING_JOIN_CHAR && p < end)
    895         p++;
    896       /* reset q */
    897       q = start;
    898     }
    899     p++;
    900   }
    902   *isFound = ESR_FALSE;
    903   return ESR_SUCCESS;
    904 }
    906 #define QUOTE_CHAR L('\'')
    907 int count_num_literals(const LCHAR* a, const LCHAR** start_points, int max_num_start_points)
    908 {
    909   int num = 0;
    910   const LCHAR *p, *q = a;
    911   const LCHAR *end = a + LSTRLEN(a);
    912   while (1)
    913   {
    914     /* look for starting QUOTE_CHAR */
    915     for (p = q; p < end; p++)
    916     {
    917       if (*p == ESC_CHAR) p++;
    918       else if (*p == QUOTE_CHAR) break;
    919     }
    920     if (p == end) break;
    921     if (num > max_num_start_points) break; /* just abort the counting! */
    922     start_points[num] = p;
    923     /* look for ending QUOTE_CHAR */
    924     for (q = p + 1; q < end; q++)
    925     {
    926       if (*q == ESC_CHAR) q++;
    927       else if (*q == QUOTE_CHAR) break;
    928     }
    929     if (q == end) /* does not close! */
    930       return -1;
    931     p = ++q;
    932     num++;
    933   }
    934   return num;
    935 }
    936 int union_literal_pair(LCHAR* o, LCHAR* a, LCHAR* b, LCHAR** pptra)
    937 {
    938   LCHAR *enda, *ptra, *endb, *ptrb;
    939   LCHAR *p, *ptro;
    940   enda = a + LSTRLEN(a);
    941   endb = b + LSTRLEN(b);
    942   /* capture the data from a to ptra */
    943   for (ptra = a + 1; ptra < enda; ptra++)
    944   {
    945     if (*ptra == ESC_CHAR) ptra++;
    946     else if (*ptra == QUOTE_CHAR) break;
    947   }
    948   /* capture the data from b to ptrb */
    949   for (ptrb = b + 1; ptrb < endb; ptrb++)
    950   {
    951     if (*ptrb == ESC_CHAR) ptrb++;
    952     else if (*ptrb == QUOTE_CHAR) break;
    953   }
    954   /* now make the output */
    955   ptro = o;
    956   *ptro++ = QUOTE_CHAR;
    957   for (p = a + 1; p < ptra; p++) *ptro++ = *p;
    958   *ptro++ = MULTIPLE_MEANING_JOIN_CHAR;
    959   for (p = b + 1; p < ptrb; p++) *ptro++ = *p;
    960   *ptro++ = QUOTE_CHAR;
    961   *ptro++ = 0;
    962   *pptra = ptra + 1;
    963   return 0;
    964 }
    966 /* now handles n1='52';n2='62'; UNION n1='53';nx='63'; */
    968 ESR_ReturnCode make_union_of_scripts(LCHAR* union_script, const size_t max_len, const LCHAR* a, const LCHAR* b)
    969 {
    970   int i, num_literals_in_a, num_literals_in_b;
    971   LCHAR *spa[8], *spb[8], *spo[8], *ptra;
    973   if (a == NULL || b == NULL)
    974     return ESR_INVALID_ARGUMENT;
    976   num_literals_in_a = count_num_literals(a, (const LCHAR **)spa, 8);
    977   num_literals_in_b = count_num_literals(b, (const LCHAR **)spb, 8);
    979   if (num_literals_in_a == 0 && num_literals_in_b == 0)
    980   {
    981     if (LSTRLEN(a) > max_len) return ESR_BUFFER_OVERFLOW;
    982     else
    983     {
    984       LSTRCPY(union_script, a);
    985       return ESR_SUCCESS;
    986     }
    987   }
    988   else if (num_literals_in_a != num_literals_in_b)
    989   {
    990     return ESR_INVALID_ARGUMENT;
    991   }
    993   /* V='Springfield_IL' union V='Springfield_MA' is V='Springfield_IL#Springfield_MA' */
    994   /* 18               +       18          -2     =  33 + 1 for NULL             */
    995   if ((LSTRLEN(a) + LSTRLEN(b) - 2) > max_len)
    996   {
    997     PLogError("Temp buffer (size %d) to hold union of multiple meanings (size %d) is too small", max_len, (LSTRLEN(a) + LSTRLEN(b) - 2));
    998     return ESR_BUFFER_OVERFLOW;
    999   }
   1001   LSTRCPY(union_script, a);
   1002   for (i = 0; i < num_literals_in_a; i++)
   1003   {
   1004     count_num_literals(union_script, (const LCHAR **)spo, 8);
   1005     /* here union_script is n0='52';n1='62'; */
   1006     union_literal_pair(spo[i], spa[i], spb[i], &ptra);
   1007 #ifdef _WIN32
   1008     if (LSTRLEN(spo[i]) > MAX_SEMPROC_VALUE)
   1009       pfprintf(PSTDOUT, "Warning: won't be able to parse this script! len %d>%d %s\n", LSTRLEN(spo[i]), MAX_SEMPROC_VALUE, spo[i]);
   1010 #endif
   1011     /* here union_script is n0='52#53' */
   1012     LSTRCAT(union_script, ptra);
   1013     /* here union_script is n0='52#53';n1='62'; */
   1014   }
   1015   return ESR_SUCCESS;
   1016 }
   1018 /**
   1019  * Default implementation.
   1020  */
   1021 ESR_ReturnCode SR_SemanticGraph_AddWordToSlot(SR_SemanticGraph* self, const LCHAR* _slot, const LCHAR* word, const LCHAR* script, const ESR_BOOL newWordAddedToFST)
   1022 {
   1023   struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
   1024   arc_token *token, *tmp;
   1025   arc_token *tmp_arc_token_list;
   1026   wordID wdID, scriptID, old_scriptID;
   1027   wordID slotID;
   1028   LCHAR union_script[MAX_STRING_LEN]; /* sizeof used elsewhere */
   1029   ESR_ReturnCode rc; int i;
   1030   int tmp_arc_token_list_len;
   1031   int offset;
   1032 #define MAX_WORD_LEN 128
   1033   char veslot[MAX_WORD_LEN];
   1035   if (script == NULL || *script == L('\0') || !LSTRCMP(script, L("NULL")))
   1036     return ESR_SUCCESS; /* no script to add so keep going */
   1038   /* find out if the word I am adding already exists. If it already exists, then that means that I
   1039      potentially am adding an alternate meaning for the word */
   1040   /* the slotname in .PCLG.txt and .map files use __ as the indicator. Xufang */
   1041   if(_slot[0] == '@') {
   1042     strcpy(veslot,SLOTNAME_INDICATOR);
   1043     strcat(veslot,_slot+1);
   1044     strcat(veslot,SLOTNAME_INDICATOR);
   1045   } else
   1046     strcpy(veslot, _slot);
   1048   slotID = wordmap_find_rule_index(impl->ilabels, veslot);
   1049   if (slotID == MAXwordID)
   1050   {
   1051     PLogError(L("ESR_NO_MATCH_ERROR: Could not find slotID in wordmap %s"), _slot);
   1052     return ESR_NO_MATCH_ERROR;
   1053   }
   1054   wdID = wordmap_find_index_in_rule(impl->ilabels, word, slotID);
   1055   if (wdID == MAXwordID)
   1056   {
   1057     PLogError(L("ESR_NO_MATCH_ERROR: Could not find wordID/slotID in wordmap %s/%d"), word, slotID);
   1058     return ESR_NO_MATCH_ERROR;
   1059   }
   1061   /* **this is an optimization step** */
   1062   /* Is word already added in this slot? if so, get the token pointer, else, token is NULL
   1063    *
   1064    * the assumption is that FST_AddWordToGrammar will tell us if this word was newly added in the FST, or
   1065    * if the word was added at least 1 iteration ago, meaning that I have already added it to my
   1066    * semgraph slot at some earlier point
   1067    */
   1068   if (newWordAddedToFST)
   1069     token = NULL;
   1070   else
   1071     token = arc_tokens_find_ilabel(impl->arc_token_list, impl->arcs_for_slot[slotID], wdID);
   1073 #define FST_GROW_FACTOR   12/10
   1074 #define FST_GROWARCS_MIN    100
   1075   if (token == NULL) /* new word to add to slot */
   1076   {
   1077     /* add the script if new  */
   1078     scriptID = wordmap_find_index(impl->scripts, script);
   1079     if (scriptID == MAXwordID)
   1080       scriptID = wordmap_add_word(impl->scripts, script);
   1081     if (scriptID == MAXwordID)
   1082     {
   1083       PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
   1084       return ESR_OUT_OF_MEMORY;
   1085     }
   1087     token = impl->arcs_for_slot[slotID];
   1088     tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
   1089     if (tmp == NULL)
   1090       {
   1091 #if defined (FST_GROW_FACTOR)
   1092 	tmp_arc_token_list_len = impl->arc_token_list_len * FST_GROW_FACTOR;
   1093 	if(tmp_arc_token_list_len - impl->arc_token_list_len <=FST_GROWARCS_MIN)
   1094 	  tmp_arc_token_list_len+=FST_GROWARCS_MIN;
   1096 	tmp_arc_token_list= NEW_ARRAY(arc_token,tmp_arc_token_list_len, L("semgraph.wordgraph"));
   1097 	if(!tmp_arc_token_list) {
   1098 	  PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of semgraph.wordgraph"));
   1099 	  return ESR_OUT_OF_MEMORY;
   1100 	}
   1101 	memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
   1103 	for(i=0; i<MAX_NUM_SLOTS;i++)
   1104 	  {
   1105 	    if(impl->arcs_for_slot[i] != NULL) {
   1106 	      offset = impl->arcs_for_slot[i] - impl->arc_token_list;
   1107 	      impl->arcs_for_slot[i] = tmp_arc_token_list + offset;
   1108 	    }
   1109 	  }
   1110 	token = impl->arcs_for_slot[slotID];
   1112 	ASSERT( impl->arc_token_freelist == NULL);
   1114 	impl->arc_token_freelist = tmp_arc_token_list + impl->arc_token_list_len;
   1116 	FREE(impl->arc_token_list);
   1117 	impl->arc_token_insert_start = tmp_arc_token_list + (impl->arc_token_insert_start - impl->arc_token_list); //Rabih fix
   1118 	impl->arc_token_list = tmp_arc_token_list;
   1120 	for (i = impl->arc_token_list_len; i < tmp_arc_token_list_len - 1; i++)
   1121 	  {
   1122 	    impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
   1123 	    impl->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, (i + 1));
   1124 	  }
   1125 	impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
   1126 	impl->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
   1128 	impl->arc_token_list_len = tmp_arc_token_list_len;
   1129 	tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
   1130       }
   1131 #endif
   1132     if(tmp == NULL) {
   1133       PLogError(L("ESR_OUT_OF_MEMORY: Error adding more arcs to graph\n"));
   1134       return ESR_OUT_OF_MEMORY;
   1135     }
   1136     impl->arcs_for_slot[slotID] = tmp;
   1137     tmp->next_token_index = ARC_TOKEN_PTR2LNK(impl->arc_token_list, token);
   1138     tmp->ilabel = wdID;
   1139     tmp->olabel = (wordID)(impl->script_olabel_offset + scriptID);
   1140   }
   1141   else
   1142   {
   1143     old_scriptID = token->olabel - impl->script_olabel_offset;
   1145     if (!LSTRCMP(impl->scripts->words[old_scriptID], script))
   1146     {
   1147       /* nothing to do, we have the word, same meaning again so do nothing */
   1148     }
   1149     else
   1150     {
   1152       CHKLOG(rc, make_union_of_scripts(union_script, sizeof(union_script), impl->scripts->words[old_scriptID], script));
   1155       PLogMessage(L("Adding alternate meaning %s for word %s (%s) in slot %s\n"), script, word,
   1156                   impl->scripts->words[old_scriptID], impl->ilabels->words[slotID]);
   1157 #endif
   1158       /* add the union as if new (if not already there) */
   1159       scriptID = wordmap_find_index(impl->scripts, union_script);
   1160       if (scriptID == MAXwordID)
   1161         scriptID = wordmap_add_word(impl->scripts, union_script);
   1162       if (scriptID == MAXwordID)
   1163       {
   1164         PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
   1165         return ESR_OUT_OF_MEMORY;
   1166       }
   1168       /* make the olabel point to the union */
   1169       token->olabel = (wordID)(impl->script_olabel_offset + scriptID);
   1170     }
   1171   }
   1172   return ESR_SUCCESS;
   1173 CLEANUP:
   1174   return rc;
   1175 }
   1178 /**
   1179  * Default implementation.
   1180  */
   1181 ESR_ReturnCode SR_SemanticGraph_Reset(SR_SemanticGraph* self)
   1182 {
   1183   struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
   1184   wordID slotid;
   1185   arc_token* tmp;
   1186   arc_token *tmp_arc_token_list;
   1188   wordmap_reset(impl->scopes_olabels);
   1189   wordmap_reset(impl->scripts);
   1190   wordmap_reset(impl->ilabels);   //Rabih: I added this
   1191   for (slotid = 1; slotid < impl->ilabels->num_slots; slotid++)
   1192   {
   1193     tmp = impl->arcs_for_slot[slotid];
   1194     arc_tokens_free_list(impl->arc_token_list, &(impl->arc_token_freelist), tmp);
   1195     impl->arcs_for_slot[slotid] = NULL;
   1196 #if defined(SANITY_CHECK)
   1197     int count;
   1198     for (count = 0, tmp = impl->arc_token_freelist; tmp != NULL;
   1199          tmp = ARC_TOKEN_PTR(impl->arc_token_list, tmp->next_token_index))
   1200     {
   1201       ASSERT(tmp->ilabel != 79324);
   1202       tmp->ilabel = 79324;
   1203       count++;
   1204     }
   1205     PLogError("after reset freelist size is %d", count);
   1206 #endif
   1207   }
   1209   // Rabih : Reset the arc_token_list
   1210   if(impl->ilabels->num_words == impl->ilabels->num_base_words)
   1211   {}
   1212   else{
   1213   impl->arc_token_list_len = (size_t)(impl->arc_token_insert_start - impl->arc_token_list);
   1214   tmp_arc_token_list= NEW_ARRAY(arc_token,impl->arc_token_list_len, L("semgraph.wordgraph"));
   1215   memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
   1217   impl->arc_token_freelist = NULL;
   1219   FREE(impl->arc_token_list);
   1220   impl->arc_token_list = tmp_arc_token_list;
   1221   }
   1222   return ESR_SUCCESS;
   1223 }
   1225 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
   1226     PFile* fp)
   1227 {
   1228   int i;
   1229   asr_uint32_t idx;
   1230   arcID tmp[32];
   1232   if (pfwrite(&impl->arc_token_list_len, 2, 1, fp) != 1)
   1233     return ESR_WRITE_ERROR;
   1235   idx = PTR_TO_IDX(impl->arc_token_freelist, impl->arc_token_list);
   1237   if (pfwrite(&idx, 4, 1, fp) != 1)
   1238     return ESR_WRITE_ERROR;
   1240   idx = PTR_TO_IDX(impl->arc_token_insert_start, impl->arc_token_list);
   1242   if (pfwrite(&idx, 4, 1, fp) != 1)
   1243     return ESR_WRITE_ERROR;
   1245   idx = 0;
   1246   if (pfwrite(&idx, 4, 1, fp) != 1)
   1247     return ESR_WRITE_ERROR;
   1249   for (i = 0; i < impl->arc_token_list_len; ++i)
   1250   {
   1251     arc_token* token = &impl->arc_token_list[i];
   1252     tmp[0] = token->ilabel;
   1253     tmp[1] = token->olabel;
   1254     tmp[2] = ARC_TOKEN_IDX(impl->arc_token_list, token->first_next_arc);
   1255     tmp[3] = ARC_TOKEN_IDX(impl->arc_token_list, token->next_token_index);
   1256     if (pfwrite(tmp, sizeof(tmp[0]), 4, fp) != 4)
   1257       return ESR_WRITE_ERROR;
   1258   }
   1260   /* new, fixes load/save bug 2007 July 31
   1261 	todo: change 4 to sizeof(asr_uint32) */
   1262   if(1) {
   1263 	asr_uint32_t idx[MAX_NUM_SLOTS];
   1264 	for(i=0; i<MAX_NUM_SLOTS; i++)
   1265 		idx[i] = PTR_TO_IDX(impl->arcs_for_slot[i], impl->arc_token_list);
   1266 	if (pfwrite(&idx, 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS)
   1267 			return ESR_WRITE_ERROR;
   1268   }
   1270   return ESR_SUCCESS;
   1271 }
   1273 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
   1274     PFile* fp)
   1275 {
   1276   int i;
   1277   asr_uint32_t idx;
   1278   ESR_ReturnCode rc = ESR_SUCCESS;
   1279   arcID tmp[32];
   1281   if (pfread(&impl->arc_token_list_len, 2, 1, fp) != 1)
   1282   {
   1283     rc = ESR_READ_ERROR;
   1284     PLogError(L("ESR_READ_ERROR: could not read arc_token_list_len"));
   1285     return rc;
   1286   }
   1288   impl->arc_token_list = NEW_ARRAY(arc_token,
   1289                                    impl->arc_token_list_len,
   1290                                    L("semgraph.wordgraph"));
   1292   if (impl->arc_token_list == NULL)
   1293   {
   1294     rc = ESR_OUT_OF_MEMORY;
   1295     PLogError(ESR_rc2str(rc));
   1296     return ESR_OUT_OF_MEMORY;
   1297   }
   1299   if (pfread(&idx, 4, 1, fp) != 1)
   1300   {
   1301     rc = ESR_READ_ERROR;
   1302     PLogError(ESR_rc2str(rc));
   1303     goto CLEANUP;
   1304   }
   1306   impl->arc_token_freelist = IDX_TO_PTR(idx, impl->arc_token_list);
   1308   if (pfread(&idx, 4, 1, fp) != 1)
   1309   {
   1310     rc = ESR_READ_ERROR;
   1311     PLogError(ESR_rc2str(rc));
   1312     goto CLEANUP;
   1313   }
   1315   impl->arc_token_insert_start = IDX_TO_PTR(idx, impl->arc_token_list);
   1316   // impl->arc_token_insert_start = impl->arc_token_list + impl->arc_token_list_len; // Rabih's fix
   1318   if (pfread(&idx, 4, 1, fp) != 1)
   1319   {
   1320     rc = ESR_READ_ERROR;
   1321     PLogError(ESR_rc2str(rc));
   1322     goto CLEANUP;
   1323   }
   1324   impl->arc_token_insert_end = 0;
   1326   for (i = 0; i < impl->arc_token_list_len; ++i)
   1327   {
   1328     arc_token* token = &impl->arc_token_list[i];
   1329     if (pfread(tmp, sizeof(tmp[0]), 4, fp) != 4)
   1330     {
   1331       rc = ESR_READ_ERROR;
   1332       goto CLEANUP;
   1333     }
   1334     token->ilabel = tmp[0];
   1335     token->olabel = tmp[1];
   1336     if (tmp[2] == MAXarcID)
   1337       token->first_next_arc = ARC_TOKEN_NULL;
   1338     else
   1339       token->first_next_arc = ARC_TOKEN_LNK(impl->arc_token_list, tmp[2]);
   1340     if (tmp[3] == MAXarcID)
   1341       token->next_token_index = ARC_TOKEN_NULL;
   1342     else
   1343       token->next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, tmp[3]);
   1344   }
   1346   /* new, fixes load/save bug 2007 July 31
   1347 	todo: change 4 to sizeof(asr_uint32) */
   1348   if(1) {
   1349 		asr_uint32_t idx[MAX_NUM_SLOTS];
   1350 		if (pfread(&idx[0], 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS) {
   1351 			rc = ESR_READ_ERROR;
   1352 			PLogError(ESR_rc2str(rc));
   1353 			goto CLEANUP;
   1354 		}
   1355 		for(i=0; i<MAX_NUM_SLOTS; i++)
   1356 			impl->arcs_for_slot[i] = IDX_TO_PTR(idx[i], impl->arc_token_list);
   1357    }
   1359   return ESR_SUCCESS;
   1361 CLEANUP:
   1362   FREE(impl->arc_token_list);
   1363   impl->arc_token_list =
   1364     impl->arc_token_freelist =
   1365       impl->arc_token_insert_start =
   1366         impl->arc_token_insert_end = NULL;
   1367   return rc;
   1368 }