Home | History | Annotate | Download | only in grxmlcompile
      1 /*---------------------------------------------------------------------------*
      2  *  grxmldoc.h  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 
     21 #ifndef __grxmldoc_h__
     22 #define  __grxmldoc_h__
     23 
     24 // #define MEMTRACE // Uses mtrace() to detect leaks
     25 
     26 #include "hashmap.h"
     27 #include "tinyxml.h"
     28 #include <stack>
     29 #include "vocab.h"
     30 
     31 #define SCRIPT_LABEL_PREFIX "_"
     32 #define SCRIPT_LABEL_PREFIX_LEN 1
     33 class Node;
     34 template <typename T1, typename T2> class HashMap;
     35 class Graph;
     36 class SubGraph;
     37 
     38 class GRXMLDoc
     39 {
     40 public:
     41     typedef TiXmlNode XMLNode;
     42     // Some convenience items for string comparison
     43     typedef enum KeywordValues {NodeTypeGrammar, NodeTypeRule, NodeTypeRuleReference, NodeTypeOneOf, NodeTypeItem, NodeTypeTag, NodeTypeCount, NodeTypeMeta, NodeTypeBadValue};
     44     typedef  std::map<std::string, KeywordValues> KEYWDPAIR;
     45 
     46     typedef struct {
     47 	bool hasRuleRef;
     48 	std::string RuleRefName;
     49 	int tagID;
     50     } ItemData;
     51 
     52     GRXMLDoc();
     53     ~GRXMLDoc();
     54 
     55     // Optional use of voc and model files
     56    // TODO: Rearrange access to voc and models
     57 #ifndef OPENFSTSDK
     58     void initialize_SR(char* parfile);
     59     void shutdown_SR();
     60     Vocabulary *getVocabulary() { return m_pVocab;}
     61     AcousticModel* getModel() { return m_pModel;}
     62     int addPhonemeToList( std::string const& s );
     63     bool findPhoneme( int i, std::string & s );
     64     bool getHMMSequence (int centre, int left, int right, std::vector<int> & modelSequence);
     65 #endif
     66 
     67     //  Lookup functions
     68     bool findSubGraph(std::string & s, SubGraph *&p_SubGraph);
     69     bool findRule(int i, std::string &s );
     70     bool findTag(int i, std::string &s );
     71     bool findLabel(int i, std::string &s );
     72     bool findSubGraphIndex( SubGraph *p_SubGraph, std::string &s );
     73     bool findRuleIndex( std::string s, int &i );
     74     bool findTagIndex( std::string s, int &i );
     75     bool findLabelIndex( std::string s, int &i );
     76     bool findSortedLabel(int i, std::string &s );
     77     bool findSortedLabelIndex( int i, int &sortedIndex );
     78     bool findMeta(const std::string & sn, std::string &s);
     79     bool setMeta(const std::string & sn, const std::string &s);
     80     void sortLabels();
     81     void addOLabelToOList( std::string & s);
     82     bool WriteOLabels(const std::string& fileName);
     83 
     84     // Take DOM object and create word graph. Creates SubGraph, rule, tag and label lists.
     85     bool parseGrammar( XMLNode &node, std::string & xMLFileName );
     86 
     87     // Generate output files
     88     void writeMapFile( std::string & fileName );
     89     void writeScriptFile( std::string & fileName );
     90     void writeGraphFiles( std::string & fileName, bool bDoWriteRecogGraphs );
     91     void writeParamsFile( std::string & fileName );
     92     void printLists();
     93     void printSubgraphs();
     94 
     95 protected:
     96     void initializeLists();
     97     bool parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
     98     bool beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
     99     bool endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
    100     bool beginParseGrammarNode( XMLNode &node );
    101     bool endParseGrammarNode( XMLNode &node );
    102     bool beginParseMetaNode( XMLNode &node );
    103     bool endParseMetaNode( XMLNode &node );
    104     bool beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph);
    105     bool endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph );
    106     bool beginItem( XMLNode &node, SubGraph *&p_SubGraph );
    107     bool endItem( XMLNode &node, SubGraph *&p_SubGraph );
    108     bool processCDATA( XMLNode &node, SubGraph *&p_SubGraph );
    109     bool beginOneOf( XMLNode &node, SubGraph *&p_SubGraph );
    110     bool endOneOf( XMLNode &node, SubGraph *&p_SubGraph );
    111     bool beginRuleRef( XMLNode &grmNode, SubGraph *&p_SubGraph );
    112     bool endRuleRef(XMLNode &node, SubGraph *&p_SubGraph );
    113     bool fixRuleRef( SubGraph *&p_SubGraph );
    114     bool getRuleRefName(XMLNode &node, std::string &ruleName);
    115     bool extendAltExpression( XMLNode &node, int level );
    116     bool beginTag( XMLNode &node, SubGraph *&p_SubGraph );
    117     bool endTag( XMLNode &node, SubGraph *&p_SubGraph );
    118     bool beginCount( XMLNode &node, SubGraph *&p_SubGraph );
    119     bool endCount( XMLNode &node, SubGraph *&p_SubGraph );
    120     void printNode( XMLNode &node, int level );
    121     bool addRuleToList(std::string const& ruleName, SubGraph *&p_SubGraph);
    122 
    123     bool deleteRules();
    124     bool addTagToList( std::string const& s );
    125     bool addLabelToList( std::string const& s );
    126     void printSubgraph( SubGraph &p_SubGraph );
    127 
    128 private:
    129 
    130     Graph *m_pGraph;	// The top-level container object for the word graph;
    131     KEYWDPAIR  m_NodeKeyWords;
    132     // The unique attributes of the GRML doc
    133     std::string m_XMLMode;
    134     std::string m_XMLLanguage;
    135     std::string m_RootRule;
    136     std::string m_XMLTagFormat;
    137     std::string m_XMLVersion;
    138     std::string m_XMLBase;
    139     std::string m_XMLFileName;
    140 
    141     //  We store indices for all labels used in the word graph.
    142     //  Store all these labels in the m_LabelList table, which is auto-indexed.
    143     //  We need a list of the rule names so that we can distinguish them from other labels.
    144     //  Store these rule names in the m_RuleList table with an index equal to the label index for the rule.
    145     //  Thus, when we need the index of a rule, we go straight to m_RuleList
    146     //	and when we need the label of a rule or any other item we use m_LabelList.
    147 
    148     HashMap<std::string,SubGraph*> m_SubgraphList;
    149     HashMap<int,std::string> m_TagList;	// <item tag = ...
    150     HashMap<int,std::string> m_LabelList; // Stores all network label IDs, including rule names
    151     HashMap<int,std::string> m_SortedLabelList; // Used to sort the labels fo
    152     HashMap<int, std::string> m_PhonemeList;    // Stores triphones
    153     HashMap<std::string,int> m_RuleList; // Stores rule name and index used in the LabelList. Use to distinguish which are rules.
    154     HashMap<int, std::string> m_RuleScope;
    155     HashMap<int, std::string> m_SlotList;
    156     HashMap<std::string, std::string> m_MetaKeyValPairs; //Store word-penalty value
    157     HashMap<std::string, int> m_OutputPtxtLabels;
    158 
    159     std::stack<ItemData*> m_ItemVarsStack;
    160     std::stack<std::string> m_RuleListStack;
    161     int m_RuleAutoIndex;
    162     int m_TagAutoIndex;
    163     int m_LabelAutoIndex;
    164     int m_PhonemeAutoIndex;
    165     int m_ExpandedRulesAutoIndex;
    166     int m_TagID; // Use to stash tag index for items.
    167     // Note the subgraph list does not have an auto-index as it is string-indexed.
    168     // All these lists also have an internal numeric index which can be used.
    169 
    170 #ifndef OPENFSTSDK
    171     Vocabulary *m_pVocab;
    172     AcousticModel *m_pModel;
    173 #endif
    174 
    175 };
    176 
    177 #endif // __grxmldoc_h__
    178 
    179 
    180 
    181