Home | History | Annotate | Download | only in grxmlcompile
      1 /*---------------------------------------------------------------------------*
      2  *  grxmldoc.cpp  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include <assert.h>
     21 #include <stdlib.h>
     22 #include <fstream>
     23 #include <sstream>
     24 #include <iostream>
     25 #include <algorithm> // for std::sort
     26 #include "tinyxml.h"
     27 #include "grph.h"       // The word graph object and interface
     28 #include "sub_grph.h"	// The sub-graph object and interface
     29 #include "hashmap.h"
     30 #include "grxmldoc.h"
     31 #include "ESR_Session.h"
     32 //#include "LCHAR.h"
     33 
     34 #define GRXML_DEBUG 0
     35 #define MAX_PATH_NAME 512
     36 
     37 #define FATAL_ERROR(x,y) { std::cout << (x) << std::endl; exit ((y)); }
     38 #define WARNING(x) std::cout << (x) << std::endl;
     39 
     40 #if GRXML_DEBUG
     41 //#define DEBUG_PRINT(x) //
     42 #define DEBUG_PRINT(x) std::cout << (x) << std::endl;
     43 #define PRINT_EXPRESSION(x)
     44 //#define PRINT_EXPRESSION(x) std::cout << (x) << std::endl;
     45 #else
     46 #define DEBUG_PRINT(x) //
     47 #define PRINT_EXPRESSION(x) //
     48 
     49 #endif
     50 
     51 using namespace std;
     52 
     53 #define CHECK_NOT_EMPTY(s, t) { if (s.empty()) \
     54 				{ \
     55 				std::cout << "ERROR: Empty string of type "  << t <<std::endl; \
     56 				} \
     57 			     }
     58 
     59 int get_range(const std::string& s, int* minCnt, int* maxCnt)
     60 {
     61   std::string sval;
     62   unsigned int p1 =s.find("-");
     63   if ( p1 !=string::npos ) {
     64     sval.assign( s, 0, p1 );
     65     if(strspn(sval.c_str(),"0123456789")<1) return 1;
     66     *minCnt = atoi( sval.c_str() );
     67     sval.assign( s, p1+1, s.size() );
     68     *maxCnt = -1;    // 0== any?
     69     // If max is given then use BeginCount otherwise use BeginItemRepeat
     70     if (!sval.empty() ) {
     71       if(strspn(sval.c_str(),"0123456789")<1) return 1;
     72       *maxCnt = atoi( sval.c_str() );
     73     }
     74     return 0;
     75   }
     76   p1 = s.find("+");
     77   if( p1 != string::npos) {
     78     sval.assign( s, 0, p1 );
     79     if(strspn(sval.c_str(),"0123456789")<1) return 1;
     80     *minCnt = atoi( sval.c_str() );
     81     *maxCnt = -1;
     82     return 0;
     83   }
     84   if(strspn(s.c_str(),"0123456789")<1) return 1;
     85   *minCnt = *maxCnt = atoi( s.c_str());
     86   return 0;
     87 }
     88 
     89 GRXMLDoc::GRXMLDoc()
     90 {
     91     m_NodeKeyWords.insert(make_pair("grammar", NodeTypeGrammar));
     92     m_NodeKeyWords.insert(make_pair("rule", NodeTypeRule));
     93     m_NodeKeyWords.insert(make_pair("ruleref", NodeTypeRuleReference));
     94     m_NodeKeyWords.insert(make_pair("one-of", NodeTypeOneOf));
     95     m_NodeKeyWords.insert(make_pair("item", NodeTypeItem));
     96     m_NodeKeyWords.insert(make_pair("tag", NodeTypeTag));
     97     m_NodeKeyWords.insert(make_pair("count", NodeTypeCount));
     98     m_NodeKeyWords.insert(make_pair("meta", NodeTypeMeta));
     99     m_pGraph = 0;
    100     m_RuleAutoIndex = 0;
    101     m_TagAutoIndex = 0;
    102     m_LabelAutoIndex = 0;
    103     m_ExpandedRulesAutoIndex = 0;
    104     m_XMLFileName = "dummy.xml";
    105 }
    106 
    107 
    108 GRXMLDoc::~GRXMLDoc()
    109 {
    110     deleteRules();
    111     if (m_pGraph) {
    112         delete m_pGraph;
    113     }
    114 }
    115 
    116 
    117 bool GRXMLDoc::parseGrammar( XMLNode &node, std::string & xMLFileName )
    118 {
    119     m_XMLFileName = xMLFileName;
    120     // Set up the internally defined rules, etc.
    121     initializeLists();
    122     // The top level "document" node is given to this fn
    123     // Create the container for the word graph.
    124     if (m_pGraph) {
    125         delete m_pGraph;
    126     }
    127     m_pGraph = new Graph("XML grammar");
    128     SubGraph *p_SubGraph;
    129 
    130     parseNode( node, p_SubGraph, 1 );     // NB Subgraph pointed to will change in recursive fn.
    131 
    132     if (findSubGraph( m_RootRule, p_SubGraph )) {
    133 	m_pGraph->ExpandRules (p_SubGraph);
    134 	p_SubGraph->RemoveInternalConnections ();
    135 	//Print the root rule.
    136 	//printSubgraph( *p_SubGraph );
    137     }
    138     return true;
    139 }
    140 
    141 
    142 bool GRXMLDoc::parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level )
    143 {
    144     // We will create a new subgraph for each rule node.
    145     // The "current" subgraph is substituted with the new subgraph for all ops on child nodes.
    146     // After processing child nodes the original subgraph is reinstated
    147     // for final operations in the endNode() fn.
    148 
    149     // Initial processing of the current node before processing children
    150 #if 0 && GRXML_DEBUG
    151 	if(node.Type() == TiXmlNode::ELEMENT)
    152 		node.ToElement()->Print( stdout, level);
    153 	else if(node.Type() == TiXmlNode::DOCUMENT)
    154 		node.ToDocument()->Print( stdout, level);
    155 	else if(node.Type() == TiXmlNode::TEXT)
    156 		node.ToText()->Print( stdout, level);
    157 	else if(node.Type() == TiXmlNode::DECLARATION)
    158 		node.ToDeclaration()->Print( stdout, level);
    159 	else {
    160 		const char* text = node.Value();
    161 		if(!text) text = "__NULL__";
    162 		printf("processing node type %d text %s\n", node.Type(), text);
    163 	}
    164 #endif
    165     beginNode( node, p_SubGraph, level );
    166 
    167     SubGraph *p_LocalSubGraph;
    168     p_LocalSubGraph = p_SubGraph;
    169 	TiXmlNode* child;
    170 	for( child = node.FirstChild(); child; child = child->NextSibling() )
    171     {
    172 		parseNode ( *child, p_SubGraph, level+1 );
    173     }
    174     // Revert current node
    175     p_SubGraph = p_LocalSubGraph;
    176 
    177     // Finish processing current node
    178     endNode( node, p_SubGraph, level );
    179 
    180     return true;
    181 } // parseNode
    182 
    183 
    184 bool GRXMLDoc::beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level )
    185 {
    186     std::string name = node.Value();
    187     DEBUG_PRINT("Element = " + name);
    188 
    189     // XMLNode::Type type = node.getType();
    190     if ( node.Type() == TiXmlNode::TEXT) // isCData()
    191     {
    192       const char* cc_name = node.Parent()->Value();
    193       std::string str_name(cc_name);
    194       DEBUG_PRINT (std::string("CDATA ") + name);
    195       DEBUG_PRINT (std::string("CDATA ") + str_name);
    196 
    197       processCDATA( node, p_SubGraph );
    198     }
    199     else if ( node.Type()== TiXmlNode::ELEMENT /*isNode()*/ || node.NoChildren() /*isLeaf()*/)
    200       {
    201 	//printNode(node, level);
    202 	// Use enum value
    203 	KEYWDPAIR::iterator pos;
    204 	pos = m_NodeKeyWords.find( name );
    205 	KeywordValues nodeType = NodeTypeBadValue;
    206 	if ( pos != m_NodeKeyWords.end() )
    207 	{
    208 	    nodeType = (*pos).second;
    209 	    DEBUG_PRINT("nodeType=" + nodeType);
    210 	} else if(node.Type() == TiXmlNode::COMMENT) {
    211 		return true;
    212 	} else if(node.Type() == TiXmlNode::DECLARATION && name.length()==0) {
    213 		return true;
    214 	} else {
    215 	  FATAL_ERROR( std::string("Error: unknown tag ") + name, ESR_INVALID_ARGUMENT);
    216 	}
    217 
    218 	switch ( nodeType )
    219 	{
    220 	case NodeTypeGrammar:
    221 	    {
    222 		beginParseGrammarNode( node );
    223 	    }
    224 	    break;
    225 	case NodeTypeRule:
    226 	    {
    227 		// NB This fn creates a new subgraph.
    228 		beginParseRuleNode( node, p_SubGraph );
    229 	    }
    230 	    break;
    231 	    case NodeTypeRuleReference:
    232 	    {
    233 		// NB This fn creates a new subgraph.
    234 		beginRuleRef( node, p_SubGraph );
    235 	    }
    236 	    break;
    237 	    case NodeTypeOneOf:
    238 	    {
    239 		beginOneOf( node, p_SubGraph );
    240 	    }
    241 	    break;
    242 	    case NodeTypeItem:
    243 	    {
    244 		beginItem( node, p_SubGraph );
    245 	    }
    246 	    break;
    247 	    case NodeTypeTag:
    248 	    {
    249 		beginTag( node, p_SubGraph );
    250 	    }
    251 	    break;
    252 	    case NodeTypeCount:
    253 	    {
    254 		beginCount( node, p_SubGraph );
    255 	    }
    256 	    break;
    257 	    case NodeTypeMeta:
    258 	    {
    259 	        beginParseMetaNode( node );
    260 	    }
    261 	    break;
    262 	    case NodeTypeBadValue:
    263 	    default:
    264 		DEBUG_PRINT( "UNKNOWN node name: " + name );
    265 	    break;
    266 	}; // switch
    267     } //is a Node or Leaf
    268     else if ( node.Type() == TiXmlNode::TEXT) // isCData()
    269       {
    270 	DEBUG_PRINT (std::string("CDATA ") + name);
    271 	processCDATA( node, p_SubGraph );
    272     }
    273     return true;
    274 } // beginNode()
    275 
    276 
    277 bool GRXMLDoc::endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level )
    278 {
    279     std::string name = node.Value();
    280     //XMLNode::Type type = node.getType();
    281 
    282     if ( node.Type()== TiXmlNode::ELEMENT /*isNode()*/ || node.NoChildren() )
    283     {
    284 	KEYWDPAIR::iterator pos;
    285 	pos = m_NodeKeyWords.find( name );
    286 	KeywordValues nodeType = NodeTypeBadValue;
    287 	if ( pos != m_NodeKeyWords.end() )
    288 	{
    289 	    nodeType = (*pos).second;
    290 	}  else if(node.Type() == TiXmlNode::COMMENT) {
    291 		return true;
    292 	} else if(node.Type() == TiXmlNode::DECLARATION && name.length()==0) {
    293 		return true;
    294 	} else if(node.Type() == TiXmlNode::TEXT) {
    295 
    296 	} else {
    297 	  FATAL_ERROR( std::string("Error: unknown tag ") + name, ESR_INVALID_ARGUMENT );
    298 	}
    299 
    300 	switch ( nodeType )
    301 	{
    302 	case NodeTypeGrammar:
    303 	{
    304 	    endParseGrammarNode( node );
    305 	}
    306 	break;
    307 	case NodeTypeRule:
    308 	{
    309 	    endParseRuleNode( node, p_SubGraph );
    310 	}
    311 	break;
    312 	case NodeTypeRuleReference:
    313 	{
    314 	    endRuleRef( node, p_SubGraph );
    315 	}
    316 	break;
    317 	case NodeTypeOneOf:
    318 	{
    319 	    endOneOf( node, p_SubGraph );
    320 	}
    321 	break;
    322 	case NodeTypeItem:
    323 	{
    324 	    endItem(node, p_SubGraph );
    325 	}
    326 	break;
    327 	case NodeTypeTag:
    328 	{
    329 	    endTag( node, p_SubGraph );
    330 	}
    331 	break;
    332 	case NodeTypeCount:
    333 	{
    334 	    endCount( node, p_SubGraph );
    335 	}
    336 	break;
    337         case NodeTypeMeta:
    338 	{
    339             endParseMetaNode( node );
    340 	}
    341 	break;
    342 	case NodeTypeBadValue:
    343 	default:
    344 	    DEBUG_PRINT( "UNKNOWN node name: ");
    345 	    DEBUG_PRINT( name.c_str() );
    346 	//Extend the
    347 	break;
    348 	}; // switch
    349     } //isNode() or isLeaf()
    350     else
    351     {
    352 	// Do nothing?
    353     }
    354     return true;
    355 } // endNode()
    356 
    357 
    358 bool GRXMLDoc::beginParseGrammarNode(XMLNode &node)
    359 {
    360 	const char* attr;
    361 #define GETATTR(nAmE) ((attr=node.ToElement()->Attribute(nAmE))!=NULL) ? attr:""
    362 	m_XMLMode      = GETATTR("mode");
    363 	m_XMLLanguage  = GETATTR("xml:lang");
    364     m_RootRule     = GETATTR("root");	// The root rule name
    365 
    366     DEBUG_PRINT("Root rule = " + m_RootRule);
    367 
    368     m_XMLTagFormat = GETATTR("tag-format");
    369     m_XMLVersion   = GETATTR("version");
    370     m_XMLBase      = GETATTR("xml:base");
    371     return true;
    372 }
    373 
    374 bool GRXMLDoc::beginParseMetaNode(XMLNode &node)
    375 {
    376   const char* attr;
    377   std::string meta_name  = GETATTR("name");
    378   std::string meta_value = GETATTR("content");
    379 
    380   if(meta_name == "word_penalty") {
    381     m_MetaKeyValPairs.insert(meta_name,meta_value);
    382     // m_MetaKeyValPairs.print();
    383   } else if(meta_name == "do_skip_interword_silence") {
    384     for(int j = 0; j<(int)meta_value.size(); j++){
    385       meta_value[j] = tolower(meta_value[j]); //lower();
    386     }
    387     if(meta_value!="true" && meta_value!="false")
    388       printf ("\nWarning: %s must be set to 'true' or 'false'; defaulting to 'false'\n", meta_name.c_str());
    389     else
    390       m_MetaKeyValPairs.insert(meta_name,meta_value);
    391   } else if(meta_name == "userdict_name") {
    392     printf ("\nWarning: ignoring unsupported meta %s %s\n", meta_name.c_str(), meta_value.c_str());
    393   } else {
    394     printf ("\nWarning: ignoring unsupported meta %s %s\n", meta_name.c_str(), meta_value.c_str());
    395   }
    396   return true;
    397 }
    398 
    399 
    400 bool GRXMLDoc::endParseGrammarNode(XMLNode &node)
    401 {
    402     // End parse operations
    403     return true;
    404 }
    405 
    406 
    407 bool GRXMLDoc::beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph)
    408 {
    409 	const char* attr;
    410     // Note: The subGraph may change if there are forward references. This
    411     // is fine as we revert to the previous one when finished parsing the current node.
    412     DEBUG_PRINT ( "---- Rule\n" );
    413     std::string ruleName = GETATTR("id" );
    414     std::string s_tag    = GETATTR("tag" );
    415     if( s_tag.length()>0) {
    416       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
    417     }
    418     CHECK_NOT_EMPTY( ruleName, "id" );
    419     // Rule name must be unique within scope of entire grammar.
    420     // Put rule on stack - for context
    421     m_RuleListStack.push( ruleName );
    422 
    423     // Check whether a ruleref placeholder exists for this rule.
    424     int index;
    425     bool foundRule = findRuleIndex( ruleName, index );
    426     if (foundRule) {
    427 	// Rule is already declared; it must have been forward referenced
    428 	// so swap the placeholder subgraph in.
    429 	// NB subgraph and rule name are already known to lists.
    430 	SubGraph *p_ExistingSubgraph;
    431 	if ( findSubGraph( ruleName, p_ExistingSubgraph ) ) {
    432 	    p_SubGraph = p_ExistingSubgraph;
    433 	}
    434 	else {
    435 	    FATAL_ERROR("ERROR! Subgraph without rule name entry found!", -1);
    436         }
    437     }
    438     else {
    439 	// Create a Word Graph node for each rule node
    440 	SubGraph *newGraph;
    441 	addRuleToList( ruleName, newGraph );
    442 	p_SubGraph = newGraph;
    443     }
    444 
    445     // Make a note of the scope or rules; public, etc - used in map file.
    446     findRuleIndex( ruleName, index );
    447     std::string ruleScope = GETATTR("scope" );
    448     if ( !ruleScope.empty() ) {
    449         m_RuleScope.insert(index, ruleScope);
    450     }
    451 
    452     // We must accommodate Rules that have CDATA without an <item> element.
    453     // We need to infer this element for all rules.
    454     m_pGraph->BeginItem( p_SubGraph );
    455 
    456     PRINT_EXPRESSION( ruleName + " = { " );
    457     return true;
    458 } // beginParseRuleNode()
    459 
    460 
    461 bool GRXMLDoc::endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph )
    462 {
    463     // The rule expression has been built as a subgraph and ID added to the rule list.
    464     // Finished editing subgraph
    465     DEBUG_PRINT ( "---- /Rule\n" );
    466     //m_pGraph->EndRule(&p_SubGraph);
    467     // Tell the world
    468     //std::string ruleName = attr.get( "id" );
    469     std::string ruleName = m_RuleListStack.top();
    470     m_RuleListStack.pop();
    471     //CHECK_NOT_EMPTY( ruleName, "id" );
    472     // Must be unique rule name within scope of entire grammar.
    473     // Check whether a ruleref placeholder exists for this rule.
    474     m_pGraph->addSubGraph ( p_SubGraph );
    475 
    476     // We must accommodate Rules that have CDATA without an <item> element.
    477     // We need to infer this element for all rules.
    478     m_pGraph->EndItem( p_SubGraph );
    479 
    480     PRINT_EXPRESSION( " }\n" );
    481     return true;
    482 }
    483 
    484 bool GRXMLDoc::processCDATA( XMLNode &node, SubGraph *&p_SubGraph )
    485 {
    486     // Note the Item's CDATA
    487     // Strip leading and trailing whitespace
    488     const char* cc_name = node.Parent()->Value();
    489     std::string str_name(cc_name); // = node.Parent()->ValueStr(); // getName
    490     // std::string name = node.Parent()->Value(); // getName
    491     //if ( name == "item" ) {
    492     if ( str_name != "tag" ) {
    493 
    494 	const char* const whitespace = " \t\r\n\v\f";
    495 	std::string cdata = node.Value(); // getCData()
    496 	std::string word; // Words are whitespace separated
    497 
    498 	cdata.erase(0, cdata.find_first_not_of(whitespace) );
    499 	cdata.erase(cdata.find_last_not_of(whitespace) + 1);
    500 #if GRXML_DEBUG
    501         std::cout << "/--" << cdata << "--/\n";
    502 #endif
    503 
    504 	std::string::size_type begIdx, endIdx;
    505 
    506         //search beginning of the first word
    507         begIdx = cdata.find_first_not_of(whitespace);
    508 
    509         //while beginning of a word found
    510 	while (begIdx != std::string::npos) {
    511             //search end of the actual word
    512             endIdx = cdata.find_first_of (whitespace, begIdx);
    513             if (endIdx == string::npos) {
    514                 //end of word is end of line
    515                 endIdx = cdata.length();
    516             }
    517             word.clear();
    518 	    // word.assign(cdata,begIdx,endIdx);
    519 	    word.append (cdata, begIdx, endIdx - begIdx);
    520 	    if ( !word.empty() )
    521 	    {
    522 #if GRXML_DEBUG
    523 		std::cout << " -->" << word << "<--\n";
    524 #endif
    525 		int index;
    526 		// If a slot then take note of rule name
    527 		if ( IsSlot( word ) ) {
    528 		  const char* xmlBasename;
    529 		  std::string ruleName = m_RuleListStack.top();
    530 		  m_SlotList.insert(index, ruleName);
    531 		  xmlBasename = strrchr(m_XMLFileName.c_str(),'/');
    532 		  xmlBasename = xmlBasename ? xmlBasename+1 : m_XMLFileName.c_str();
    533 		  word = (std::string)xmlBasename + "." + ruleName + "@" + word;
    534 		  addLabelToList( word );
    535 		  findLabelIndex( word, index );
    536 		} else {
    537 		  addLabelToList( word );
    538 		  findLabelIndex( word, index );
    539 		}
    540 		m_pGraph->AddLabel( p_SubGraph, index );
    541 	    }
    542 	    begIdx = cdata.find_first_not_of (whitespace, endIdx);
    543 
    544 	}
    545     } //tag
    546     else {
    547 	// Do nothing with CDATA for elements that are not items.
    548 	// In particular, do not strip whitespace from tag cdata.
    549 	// However, CPPDOM appears to remove linefeeds. May need to tidy up.
    550 
    551     }
    552     return true;
    553 } // cdata
    554 
    555 bool GRXMLDoc::beginItem( XMLNode &node, SubGraph *&p_SubGraph )
    556 {
    557 	const char* attr;
    558     DEBUG_PRINT ("---- Item:\n");
    559     // First check whethere there is a count/repeat
    560     std::string s     = GETATTR("repeat" );
    561     int minCnt=0,maxCnt=0;
    562     std::string s_tag = GETATTR("tag" );
    563     if( s_tag.length()>0) {
    564       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
    565     }
    566     if( s.length()>0 && get_range( s, &minCnt, &maxCnt) ) {
    567       FATAL_ERROR(std::string("error: while parsing range ") + s,1);
    568     }
    569     if ( !s.empty() ) {
    570       // RED FLAG: max should not be 0! A +ve number should have been given.
    571       if( maxCnt>0) {
    572 	m_pGraph->BeginCount( p_SubGraph, minCnt, maxCnt );
    573       }
    574       else {
    575 	// NB: BeginItemRepeat  can only use min of 0 or 1!
    576 	m_pGraph->BeginItemRepeat ( p_SubGraph, minCnt, -1);
    577       }
    578     }
    579     else {
    580 	m_pGraph->BeginItem( p_SubGraph );
    581     }
    582     return true;
    583 }
    584 
    585 
    586 bool GRXMLDoc::endItem( XMLNode &node, SubGraph *&p_SubGraph )
    587 {
    588     DEBUG_PRINT ( "---- /Item\n" );
    589 
    590     // What TODO if no tag for an item?
    591 
    592     m_pGraph->EndItem( p_SubGraph );
    593     return true;
    594 }
    595 
    596 
    597 bool GRXMLDoc::beginRuleRef( XMLNode &node, SubGraph *&p_SubGraph )
    598 {
    599     // Extend word FST node with an entire FST subgraph.
    600     // Forward referencing of rules is supported.
    601     // NB Remove the leading # from the ruleref name!
    602     DEBUG_PRINT ( "---- Ruleref\n" );
    603 
    604 	const char* attr;
    605     std::string s_tag = GETATTR("tag" );
    606     if( s_tag.length()>0) {
    607       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
    608     }
    609     std::string s = GETATTR("uri" );
    610     if (s.empty())
    611     {
    612 	//
    613 	FATAL_ERROR( "ERROR! Ruleref specifies no uri name!", -1 );
    614     }
    615     // Remove the #:
    616     int p1 = s.find("#");
    617     if ( p1 !=0 ) {
    618 	FATAL_ERROR( "ERROR! bad ruleref name: '" + s + "'" + ". Rule reference must start with a '#'. External references are not supported.", -1 );
    619     }
    620     string ruleName;
    621     getRuleRefName( node, ruleName );
    622 
    623     //std::string parentRuleName = m_RuleListStack.top();
    624     //addRuleDependency( parentRuleName, ruleName );
    625 
    626     int index;
    627     bool foundRule = findRuleIndex( ruleName, index );
    628     if (!foundRule) {
    629 	// Forward reference; create a placeholder subgraph ptr.
    630 	//SubGraph *newGraph = new SubGraph( (char *) ruleName.c_str() );
    631 	// RED FLAG:  Remember to check fwd ref rule was filled in at end.
    632 	SubGraph *newGraph;
    633 	addRuleToList( ruleName, newGraph );
    634 	findRuleIndex( ruleName, index );
    635     }
    636     // We can now treat a forward-referenced graph as if it was defined.
    637     // We will add the subgraph when we have the tag - see endItem().
    638     m_pGraph->BeginRule( p_SubGraph );
    639     m_pGraph->AddRuleRef( p_SubGraph, index );
    640     m_pGraph->EndRule( p_SubGraph );
    641 
    642     return true;
    643 }
    644 
    645 
    646 bool GRXMLDoc::endRuleRef(XMLNode &grmNode, SubGraph *&p_SubGraph )
    647 {
    648     DEBUG_PRINT ( "---- /Ruleref\n" );
    649     // Does nothing
    650     // NB The tag is not under the ruleref element - it is in the current item element.
    651     // We now add the tag of the AddRuleRef as we see the tag element. See EndTag().
    652 
    653     return true;
    654 }
    655 
    656 
    657 bool GRXMLDoc::beginOneOf(XMLNode &grmNode, SubGraph *&p_SubGraph)
    658 {
    659     DEBUG_PRINT ( "----OneOf\n" );
    660     m_pGraph->BeginOneOf (p_SubGraph);
    661     return true;
    662 }
    663 
    664 
    665 bool GRXMLDoc::endOneOf(XMLNode &grmNode, SubGraph *&p_SubGraph)
    666 {
    667     DEBUG_PRINT ( "----/OneOf\n" );
    668     m_pGraph->EndOneOf (p_SubGraph);
    669     return true;
    670 }
    671 
    672 
    673 bool GRXMLDoc::beginTag( XMLNode &node, SubGraph *&p_SubGraph )
    674 {
    675     DEBUG_PRINT ("---- Tag\n");
    676     std::string s = node.ToElement()->GetText(); // getCdata();
    677 #if GRXML_DEBUG
    678     std::cout << s;     // debug
    679 #endif
    680     // Store the semantic tag info.
    681     // NB Do not strip whitespace from tag cdata
    682     if ( !s.empty() )
    683     {
    684 	int index;
    685 	addTagToList( s );
    686 	findTagIndex( s, index );
    687 	m_pGraph->AddTag ( p_SubGraph, index );
    688     }
    689 
    690     return true;
    691 }
    692 
    693 
    694 bool GRXMLDoc::endTag( XMLNode &node, SubGraph *&p_SubGraph )
    695 {
    696     DEBUG_PRINT ("---- /Tag\n");
    697     return true;
    698 }
    699 
    700 
    701 bool GRXMLDoc::beginCount( XMLNode &node, SubGraph *&p_SubGraph )
    702 {
    703 	const char* attr;
    704     // Count of reps applies to the text elements in this count node
    705     DEBUG_PRINT ("---- Count\n");
    706     // Get number attr
    707     std::string s     = GETATTR("number");
    708     std::string s_tag = GETATTR("tag" );
    709     if( s_tag.length()>0) {
    710       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
    711     }
    712     if (s.empty()) {
    713 		return false;
    714     }
    715     // not  in subgraph but in graph?!
    716     //graph.BeginCount(n);
    717 
    718     int minCnt=-1, maxCnt=-1;
    719     if( get_range( s, &minCnt, &maxCnt) ) {
    720       FATAL_ERROR(std::string("error: while parsing range ") + s,1);
    721     }
    722     if ( s.c_str() == std::string("optional") )
    723     {
    724 	m_pGraph->BeginOptional( p_SubGraph );
    725     }
    726     else if ( minCnt>0 && maxCnt>0)
    727     {
    728 	m_pGraph->BeginCount( p_SubGraph, minCnt, maxCnt );
    729     }
    730     else if( minCnt>0 )
    731       {
    732 	m_pGraph->BeginItemRepeat ( p_SubGraph, minCnt, -1);
    733       }
    734     else { //
    735     	m_pGraph->BeginOptional ( p_SubGraph );
    736     }
    737 
    738     return true;
    739 }
    740 
    741 
    742 bool GRXMLDoc::endCount( XMLNode &node, SubGraph *&p_SubGraph )
    743 {
    744     DEBUG_PRINT ("---- /Count\n");
    745     m_pGraph->EndCount( p_SubGraph );
    746     return true;
    747 }
    748 
    749 bool GRXMLDoc::endParseMetaNode(XMLNode &node)
    750 {
    751   // End parse operations
    752   return true;
    753 }
    754 
    755 void GRXMLDoc::printNode(XMLNode &node, int level)
    756 {
    757     std::string name = node.Value();
    758     int type = node.Type();
    759     std::string c_data;
    760 
    761     for(int i=0;i<level;i++) std::cout << " ";
    762 
    763     char c = ' ';
    764     switch(type)
    765     {
    766     case TiXmlNode::ELEMENT:
    767 	// case XMLNode::xml_nt_node: // grammar, rule, one-of, item, count
    768 	 c = '+';
    769 	 break;
    770 	/* case TiXmlNode::TEXT:
    771 	// case XMLNode::xml_nt_leaf:
    772 	c = '-';
    773 	break; */
    774     case TiXmlNode::DOCUMENT:
    775     // case XMLNode::xml_nt_document:
    776 	c = '\\';
    777 	break;
    778     case TiXmlNode::TEXT:
    779     // case XMLNode::xml_nt_cdata:
    780 	c = '#';
    781 	c_data = node.Value(); // getCdata();
    782 	break;
    783 	case TiXmlNode::UNKNOWN:
    784 	case TiXmlNode::COMMENT:
    785 	case TiXmlNode::TYPECOUNT:
    786 	case TiXmlNode::DECLARATION:
    787 	default:
    788 		std::cout << "Error: not sure what to do here" << std::endl;
    789 		break;
    790     }
    791 	if(node.Type() == TiXmlNode::TEXT)  // isCData()
    792 	  std::cout << c << name.c_str() << "[" << c_data << "]" << std::endl;
    793 	//Extend the tag hashtable
    794     else
    795 	  std::cout << c << name.c_str() << std::endl;
    796 
    797 	if( node.Type() == TiXmlNode::ELEMENT) {
    798 
    799 		for(TiXmlAttribute* attr=node.ToElement()->FirstAttribute();
    800 			attr; attr=attr->Next() ) {
    801 
    802 		  // guru: added output of attributes
    803 			for (int i=0; i<level; i++)
    804 				std::cout << " ";
    805 			std::cout << "   ";
    806 			std::cout << attr->Name() << ": " << attr->Value() << std::endl;
    807 		}
    808 	}
    809 
    810 }
    811 
    812 /** Function: addRuleToList
    813     Extends list of SubGraphs with given subGraph
    814     and extends list of rule names too.
    815     TODO: Can we use one hash and use internal numeric index for rule IDs?
    816 */
    817 
    818 
    819 bool GRXMLDoc::addRuleToList(std::string const & ruleName, SubGraph *&p_SubGraph)
    820 {
    821     int index;
    822     if ( findRuleIndex ( ruleName, index ) ) {
    823 	FATAL_ERROR("ERROR! Rule name " + ruleName + " is already defined!", -1 );
    824     }
    825 
    826     addLabelToList( m_XMLFileName + "@" + ruleName);
    827     findLabelIndex( m_XMLFileName + "@" + ruleName, index );
    828 #if GRXML_DEBUG
    829     std::cout << "Rule " << ruleName << std::endl;
    830 #endif
    831     // Create the new subgraph and update lists
    832     m_RuleList.insert( ruleName, index );
    833     p_SubGraph = new SubGraph( (char *) ruleName.c_str(), index );
    834 
    835     bool success = m_SubgraphList.insert( ruleName, p_SubGraph );
    836     if (!success) {
    837 	FATAL_ERROR("ERROR! subgraph for " + ruleName + " is already defined!", -1 );
    838     }
    839 #if ADD_BRACES
    840     addLabelToList( "{" );
    841     std::stringstream  ss;
    842     ss << "}(" << index << ")";
    843     addLabelToList( ss.str());
    844 #endif
    845     return success;
    846 }
    847 
    848 
    849 bool GRXMLDoc::deleteRules()
    850 {
    851     // Delete all allocated subgraphs.
    852     // The rule strings are part of the hashtables and get deleted by them.
    853     int index;
    854     SubGraph *p_SubGraph;
    855     std::string ruleName;
    856     while ( !m_RuleList.isEmpty() ) {
    857 	m_RuleList.getFirst( &ruleName, &index );
    858 	m_RuleList.remove( ruleName );
    859 	if (m_SubgraphList.getValue( ruleName, &p_SubGraph ) ) {
    860 	    delete p_SubGraph;
    861 	}
    862 	else {
    863 	    FATAL_ERROR("No subgraph for rule " + ruleName + "! Mismatched rules and subgraph hashtables!", -1);
    864 	}
    865     }
    866     m_SubgraphList.clear();
    867     m_RuleList.clear();
    868     m_LabelList.clear();
    869     m_TagList.clear();
    870     return true;
    871 }
    872 
    873 bool GRXMLDoc::findSubGraph(std::string & s, SubGraph *&p_SubGraph)
    874 {
    875     return m_SubgraphList.getValue(s, &p_SubGraph);
    876 }
    877 
    878 bool GRXMLDoc::findRule(int i, std::string &s )
    879 {
    880     return m_RuleList.getIndex( i, &s );
    881 }
    882 
    883 bool GRXMLDoc::findTag(int i, std::string &s )
    884 {
    885     return m_TagList.getValue( i, &s );
    886 }
    887 
    888 bool GRXMLDoc::findLabel(int i, std::string &s )
    889 {
    890     return m_LabelList.getValue( i, &s );
    891 }
    892 
    893 bool GRXMLDoc::findSubGraphIndex( SubGraph *p_SubGraph, std::string &s )
    894 {
    895     return m_SubgraphList.getIndex( p_SubGraph, &s );
    896 }
    897 
    898 bool GRXMLDoc::findRuleIndex( std::string s, int &i )
    899 {
    900     return m_RuleList.getValue( s, &i );
    901 }
    902 bool GRXMLDoc::findTagIndex( std::string s, int &i )
    903 {
    904     return m_TagList.getIndex( s, &i );
    905 }
    906 bool GRXMLDoc::findLabelIndex( std::string s, int &i )
    907 {
    908     return m_LabelList.getIndex( s, &i );
    909 }
    910 bool GRXMLDoc::findMeta(const std::string & sn, std::string &s)
    911 {
    912     return m_MetaKeyValPairs.getValue( sn, &s );
    913 }
    914 bool GRXMLDoc::setMeta(const std::string & sn, const std::string &s)
    915 {
    916   std::string tmp;
    917   if(findMeta(sn,tmp))
    918     m_MetaKeyValPairs.remove(sn);
    919   return m_MetaKeyValPairs.insert(sn,s);
    920 }
    921 
    922 bool GRXMLDoc::addTagToList( std::string const& s )
    923 {
    924     bool success = true;
    925     // Make values unique
    926     int index;
    927     if ( !findTagIndex( s, index ) )
    928 	success = m_TagList.insert( m_TagAutoIndex++, s );
    929     return success;
    930 }
    931 
    932 
    933 bool GRXMLDoc::addLabelToList( std::string const& s )
    934 {
    935   // TODO: Labels should be unique. Change key.
    936   int index;
    937   bool bRes = m_LabelList.getIndex( s, &index );
    938   if(bRes == true) {
    939     return false; // exists
    940   }
    941   bRes = m_LabelList.insert( m_LabelAutoIndex++, s );
    942   return  bRes;
    943 }
    944 
    945 void GRXMLDoc::printLists()
    946 {
    947     m_SubgraphList.print();
    948     m_RuleList.print();
    949     m_TagList.print();
    950     m_LabelList.print();
    951 }
    952 
    953 
    954 void GRXMLDoc::printSubgraphs()
    955 {
    956     SubGraph *p_SubGraph;
    957     std::string rule;
    958     int index;
    959     if ( m_RuleList.getFirst( &rule, &index) ) {
    960 	if ( findSubGraph( rule, p_SubGraph ) ) {
    961 	    DEBUG_PRINT("============ Rule: " + rule + "============");
    962 	    printSubgraph( *p_SubGraph );
    963 	    while ( m_RuleList.getNext( &rule, &index) ) {
    964 		if ( findSubGraph( rule, p_SubGraph ) ) {
    965 		    printSubgraph( *p_SubGraph );
    966 		}
    967 	    }
    968 	}
    969     }
    970 }
    971 
    972 
    973 void GRXMLDoc::printSubgraph( SubGraph &p_SubGraph )
    974 {
    975     p_SubGraph.PrintWithLabels( *this );
    976 }
    977 
    978 
    979 bool GRXMLDoc::getRuleRefName(XMLNode &node, std::string &ruleName)
    980 {
    981   const char* attr;
    982   std::string s = GETATTR("uri" );
    983   if (s.empty()) {
    984     FATAL_ERROR( "ERROR! Ruleref specifies no uri name!", -1 );
    985   }
    986   // Remove the #:
    987   int p1 = s.find("#");
    988   if ( p1 !=0 ) {
    989     FATAL_ERROR( "ERROR! bad ruleref name: '" + s + "'", -1 );
    990   }
    991   ruleName.assign( s, 1, s.size() );
    992   return true;
    993 }
    994 
    995 void GRXMLDoc::initializeLists()
    996 {
    997   m_SubgraphList.setName("Subgraphs");
    998   m_RuleList.setName("Rules");
    999   m_TagList.setName("Tags");
   1000   m_LabelList.setName("Labels");
   1001 
   1002   /* Predefined rules. NB Labels are also created for each rule added.
   1003   // The required order for these labels in the .map output file is:
   1004   //     0   eps
   1005   //     next come slots
   1006   //     pau and pau2
   1007   //     everything else
   1008   // We will add all these now in case they are referenced and we will
   1009   // reindex after we have parsed the grammar -- when we have the list
   1010   // of slots. This re-indexing is for the output files .map and .P.txt.
   1011   //
   1012   */
   1013     addLabelToList( "eps" );
   1014 
   1015     addLabelToList( "-pau-" );
   1016     addLabelToList( "-pau2-" );
   1017 }
   1018 
   1019 void GRXMLDoc::writeMapFile( std::string & fileName )
   1020 {
   1021     // We need to re-index in order to put the labels in correct order:
   1022     // 1. eps
   1023     // 2. all slots
   1024     // 3. all rules
   1025     // 4. -pau- words
   1026     // 5. remaining labels
   1027     ofstream outfile;
   1028     int index, origIndex;
   1029     std::string label;
   1030     std::string slotRuleName;
   1031     std::string scope; // For rules
   1032     HashMap<int,std::string> orderedList;
   1033     int orderedIndex=0;
   1034     // 1. eps
   1035     orderedList.insert( orderedIndex++, "eps" );
   1036 
   1037     // 2. slots
   1038     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
   1039 	if ( IsSlot( label ) ) {
   1040 	    orderedList.insert( orderedIndex++, label );
   1041 	}
   1042 	while (m_LabelList.getNext( &origIndex, &label ) ) {
   1043 	    if ( IsSlot( label ) ) {
   1044 		orderedList.insert( orderedIndex++, label );
   1045 	    }
   1046 	}
   1047     }
   1048 
   1049     // 3.  Now rules, or anything with @
   1050     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
   1051 	do {
   1052 #if GRXML_DEBUG
   1053 	    std::cout << label << " "<< label.find_first_of ("@") << std::endl;
   1054 #endif
   1055             if (!IsSlot(label) && label.find_first_of ("@") != string::npos) {
   1056 #if GRXML_DEBUG
   1057 		std::cout << "    Adding " << label << std::endl;
   1058 #endif
   1059 		orderedList.insert( orderedIndex++, label );
   1060 	    }
   1061 	} while (m_LabelList.getNext( &origIndex, &label ) );
   1062     }
   1063 
   1064     // 4. pau
   1065     orderedList.insert( orderedIndex++, "-pau-" );
   1066     orderedList.insert( orderedIndex++, "-pau2-" );
   1067 
   1068     // 5. Remaining stuff. NB We depend upon the label not
   1069     //    being added twice.
   1070     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
   1071 	if ( !orderedList.getIndex( label, &index ) ) {
   1072 	  orderedList.insert( orderedIndex++, label );
   1073 	}
   1074 	while (m_LabelList.getNext( &origIndex, &label ) ) {
   1075 	    if ( !orderedList.getIndex( label, &index ) ) {
   1076 	      orderedList.insert( orderedIndex++, label );
   1077 	    }
   1078 	}
   1079     }
   1080     outfile.open ( fileName.c_str() );
   1081 
   1082     bool bRes = orderedList.getFirst( &index, &label );
   1083     do {
   1084       if(!bRes) break;
   1085       // Look up scope using original index
   1086       m_LabelList.getIndex( label, &origIndex );
   1087       if (m_RuleScope.getValue(origIndex, &scope) )
   1088 	label = scope + ":" + label;
   1089       outfile << label << " " << index << std::endl;
   1090       bRes = orderedList.getNext( &index, &label );
   1091     } while(bRes);
   1092 
   1093     outfile.close();
   1094 }
   1095 
   1096 
   1097 void GRXMLDoc::writeScriptFile( std::string & fileName )
   1098 {
   1099     ofstream outfile;
   1100     int index;
   1101     std::string label;
   1102     outfile.open ( fileName.c_str() );
   1103     if ( m_TagList.getFirst( &index, &label ) ) {
   1104     	outfile << index << " " << label << std::endl;
   1105     }
   1106     while (m_TagList.getNext( &index, &label ) ) {
   1107     	outfile << index << " " << label << std::endl;
   1108     }
   1109     outfile.close();
   1110 
   1111     //m_LabelList.writeFile( fileName );
   1112 }
   1113 
   1114 void GRXMLDoc::writeParamsFile( std::string & fileName )
   1115 {
   1116   std::string wtw;
   1117   ofstream outfile;
   1118   bool bRes;
   1119 
   1120   outfile.open(fileName.c_str());
   1121 
   1122   std::string metaname = "word_penalty";
   1123   bRes = findMeta(metaname, wtw);
   1124   if(bRes)
   1125     outfile << metaname.c_str() << "\t=\t" << wtw.c_str() << std::endl;
   1126 
   1127   // outfile << "locale"  << "\t=\t" << m_XMLLanguage << std::endl;
   1128   outfile.close();
   1129 }
   1130 
   1131 void GRXMLDoc::writeGraphFiles( std::string& prefix, bool bDoWriteRecogGraphs)
   1132 {
   1133     SubGraph *p_SubGraph;
   1134     SubGraph *p_SemGraph;
   1135     std::string fileName;
   1136     if ( !findSubGraph( m_RootRule, p_SubGraph ) ) {
   1137 	FATAL_ERROR ("ERROR: writeGraphFiles - no root rule "+ m_RootRule + " defined. No file created", -1 );
   1138     }
   1139 
   1140     //  Create .P.txt
   1141     printf ("\nCreating semantic graph file\n");
   1142     p_SemGraph = new SubGraph( (char *) "Main", -1);
   1143     m_pGraph->BeginRule( p_SemGraph );
   1144     m_pGraph->AddRuleRef( p_SemGraph, p_SubGraph->getRuleId());
   1145     m_pGraph->EndRule( p_SemGraph );
   1146     m_pGraph->ExpandRules (p_SemGraph);
   1147     p_SemGraph->RemoveInternalConnections ();
   1148 
   1149     p_SemGraph->AddTerminalConnections ();
   1150     p_SemGraph->ReduceArcsByEquivalence();
   1151     p_SemGraph->RemoveUnreachedConnections (-1, -1);
   1152     p_SemGraph->DeterminizeArcs();
   1153     p_SemGraph->RemoveUnreachedConnections (-1, -1);
   1154     p_SemGraph->ReduceArcsByEquivalence();
   1155     p_SemGraph->RemoveUnreachedConnections (-1, -1);
   1156     fileName = prefix + ".P.txt";
   1157     p_SemGraph->WriteForwardGraphWithSemantic( fileName, *this );
   1158     delete p_SemGraph;
   1159 
   1160     fileName = prefix + ".omap";
   1161     this->WriteOLabels(fileName);
   1162 }
   1163 
   1164 void GRXMLDoc::sortLabels()
   1165 {
   1166     // We need to re-index in order to put the labels in correct order:
   1167     int index=0, origIndex;
   1168     std::string label;
   1169     std::string slotRuleName;
   1170     std::string scope; // For rules
   1171     std::vector <std::string> orderedList;
   1172     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
   1173         // Look up scope using original index
   1174         orderedList.push_back( label );
   1175         while (m_LabelList.getNext( &origIndex, &label ) ) {
   1176             orderedList.push_back( label );
   1177         }
   1178     }
   1179     std::sort(orderedList.begin(), orderedList.end() );
   1180     m_SortedLabelList.clear();
   1181     index=0;
   1182     for (std::vector<std::string>::const_iterator citer = orderedList.begin();
   1183      citer != orderedList.end(); ++citer) {
   1184         label = *citer;
   1185         m_LabelList.getIndex( label, &origIndex );
   1186         m_SortedLabelList.insert( index, label );
   1187         index++;
   1188         // std::cout <<"Sorted: " << index <<" " << label <<std::endl;
   1189     }
   1190     return;
   1191 }
   1192 
   1193 bool GRXMLDoc::findSortedLabel(int i, std::string &s )
   1194 {
   1195     if (m_SortedLabelList.isEmpty() ) {
   1196         sortLabels(); // Create the sorted label list.
   1197     }
   1198     return m_SortedLabelList.getValue( i, &s );
   1199 }
   1200 
   1201 bool GRXMLDoc::findSortedLabelIndex( int i, int &sortedIndex )
   1202 {
   1203     std::string s;
   1204     if (m_SortedLabelList.isEmpty() ) {
   1205         sortLabels(); // Create the sorted label list.
   1206     }
   1207     if ( m_LabelList.getValue( i, &s ) ) {
   1208         if ( m_SortedLabelList.getIndex(s, &sortedIndex )) {
   1209             return true;
   1210         }
   1211     }
   1212     return false;
   1213 }
   1214 
   1215 void GRXMLDoc::addOLabelToOList( std::string &s)
   1216 {
   1217     m_OutputPtxtLabels.insert( s, 0);
   1218 }
   1219 
   1220 bool GRXMLDoc::WriteOLabels(const std::string& fileName)
   1221 {
   1222   HashMap<int,std::string> invMap;
   1223   int count = 0;
   1224   int max_script_label = 0;
   1225   int scriptID = 0;
   1226   std::map<std::string, int>::iterator iter;
   1227   bool bFound;
   1228   int tmp;
   1229 
   1230   std::string strIndex = "eps";
   1231   bFound = m_OutputPtxtLabels.getValue(strIndex, &tmp);
   1232   if(bFound)
   1233     m_OutputPtxtLabels.remove(strIndex);
   1234   m_OutputPtxtLabels.insert(strIndex, count);
   1235   invMap.insert( count, strIndex);
   1236   count++;
   1237 
   1238   strIndex = "{";
   1239   bFound = m_OutputPtxtLabels.getValue(strIndex, &tmp);
   1240   if(bFound)
   1241     m_OutputPtxtLabels.remove(strIndex);
   1242   m_OutputPtxtLabels.insert(strIndex, count);
   1243   invMap.insert( count, strIndex);
   1244   count++;
   1245 
   1246   iter = m_OutputPtxtLabels.begin();
   1247   for( ; iter!=m_OutputPtxtLabels.end(); iter++) {
   1248     const char* label = iter->first.c_str();
   1249     if( !strncmp(label,SCRIPT_LABEL_PREFIX, SCRIPT_LABEL_PREFIX_LEN)
   1250 	&& strspn(label+SCRIPT_LABEL_PREFIX_LEN,"0123456789")==strlen(label+SCRIPT_LABEL_PREFIX_LEN) ) {
   1251       scriptID = atoi(label+SCRIPT_LABEL_PREFIX_LEN);
   1252       if(max_script_label < scriptID)
   1253 	max_script_label = scriptID;
   1254     }/* else if( !strncmp(label,SCRIPT_LABEL_PREFIX, SCRIPT_LABEL_PREFIX_LEN)) {
   1255       invMap.insert(count, iter->first);
   1256       iter->second = count;
   1257       count++;
   1258       }*/
   1259     else if(!invMap.getIndex((iter->first), &tmp)){
   1260       invMap.insert(count, iter->first);
   1261       iter->second = count;
   1262       count++;
   1263     }
   1264   }
   1265 
   1266   cout << "found max_script_label " << max_script_label << endl;
   1267   for(int j=0; j<=max_script_label; j++) {
   1268     std::stringstream ss;
   1269     ss << SCRIPT_LABEL_PREFIX << j;
   1270     if(!invMap.getIndex( ss.str(), &tmp)) {
   1271       invMap.insert( count++, ss.str());
   1272     }
   1273   }
   1274 
   1275   std::ofstream outfile(fileName.c_str());
   1276   std::string outscript;
   1277   if(!outfile) {
   1278     FATAL_ERROR( "Error: opening the omap file for output", 1);
   1279     WARNING( "Error: opening the omap file for output");
   1280     return 1;
   1281   }
   1282   for(int i=0; i<count; i++) {
   1283     outscript = "";
   1284     invMap.getValue(i,&outscript);
   1285     if(outscript.length() == 0) {
   1286       cout << "error: internal error while making .omap " << i << endl;
   1287       FATAL_ERROR("error",1);
   1288     }
   1289     outfile << outscript.c_str() << " " << i << std::endl;
   1290   }
   1291   outfile.close();
   1292   return 0;
   1293 }
   1294