Home | History | Annotate | Download | only in grxmlcompile
      1 /* FILE:		sub_grph.h
      2  *  DATE MODIFIED:	31-Aug-07
      3  *  DESCRIPTION:	Part of the  SREC graph compiler project source files.
      4  *
      5  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      6  *                                                                           *
      7  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      8  *  you may not use this file except in compliance with the License.         *
      9  *                                                                           *
     10  *  You may obtain a copy of the License at                                  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     12  *                                                                           *
     13  *  Unless required by applicable law or agreed to in writing, software      *
     14  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     15  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     16  *  See the License for the specific language governing permissions and      *
     17  *  limitations under the License.                                           *
     18  *                                                                           *
     19  *---------------------------------------------------------------------------*/
     20 
     21 #ifndef __sub_grph_h__
     22 #define __sub_grph_h__
     23 
     24 #include "netw_arc.h"
     25 
     26 #include "vocab.h"
     27 
     28 //  TODO:
     29 //  Guards various stages 1.  Compiling  2.  Processing
     30 
     31 #define SCOPE_ROOT         0
     32 #define SCOPE_ITEM        -1
     33 #define SCOPE_RULE        -2
     34 #define SCOPE_ONEOF       -3
     35 #define SCOPE_COUNT       -4
     36 #define SCOPE_OPT         -5
     37 #define SCOPE_REPEAT      -6
     38 
     39 #define NONE_LABEL        -100000           //  Null transition
     40 #define TAG_LABEL         -100001           //  Tag transition
     41 #define WB_LABEL          -100002           //  Word boundary transition
     42 #define TERMINAL_LABEL    -100003           //  Terminal transition
     43 #define BEGINSCOPE_LABEL  -100004           //  Start of scope
     44 #define ENDSCOPE_LABEL    -100005           //  End of scope
     45 #define BEGINRULE_LABEL   -100006           //  Start of rule
     46 #define ENDRULE_LABEL     -100007           //  End of rule
     47 #define DISCARD_LABEL     -100010           //  Discard (internal)
     48 #define INITIAL_LABEL     -100011           //  Initial silence
     49 #define FINAL_LABEL       -100012           //  Final silence
     50 
     51 #define MAXNUM             100000           //  A large number
     52 #define BLKSIZE             10000           //  Buffer size
     53 #define MAXITS                  5           //  Maximum equivalence reduction iterations
     54 
     55 // #define MAX(X,Y)        ((X) > (Y) ? (X) : (Y))
     56 // #define MIN(X,Y)        ((X) > (Y) ? (X) : (Y))
     57 
     58 // General functions
     59 bool IsSlot (std::string label);
     60 
     61 
     62 class DetCache {
     63     public:
     64 
     65         DetCache () { num= 0; dataPack= 0; };
     66 
     67         void AddEntry (int comb, int first, int second) {
     68             if (num == 0)
     69                 dataPack= new int [3*BLKSIZE];
     70             else if ((num%BLKSIZE) == 0) {
     71                 int *newPack = new int [num+3*BLKSIZE];
     72                 for (int ii= 0; ii < (3*num); ii++)
     73                     newPack[ii]= dataPack[ii];
     74                 delete [] dataPack;
     75                 dataPack= newPack;
     76             }
     77             dataPack[3*num]= first;
     78             dataPack[3*num+1]= second;
     79             dataPack[3*num+2]= comb;
     80 	    num++;
     81             return;
     82         };
     83 
     84         int QueryEntry (int first, int second) {
     85             if (!dataPack)
     86                 return -1;
     87             for (int ii= 0; ii < num; ii++)
     88                 if (dataPack[3*ii] == first && dataPack[3*ii+1] == second)
     89                     return (dataPack[3*ii+2]);
     90             return -1;
     91         }
     92 
     93         ~DetCache () { if (dataPack) delete [] dataPack; };
     94 
     95     private:
     96         int num;
     97         int *dataPack;
     98 };
     99 
    100 
    101 class SubGraph
    102 {
    103 public:
    104 
    105     SubGraph (char *name, int ruleNo)
    106     {
    107         int count= strlen(name);
    108         title= new char [count+1];
    109         strcpy (title, name);
    110 	// printf ("New Rule %s\n", title);
    111         ruleId= ruleNo;
    112         popOp= 0;
    113         numArc= 0;
    114         numVertex= 0;
    115         lastScope= SCOPE_ROOT;
    116         startId= NewVertexId();
    117         lastId= startId;
    118         endId= lastId;
    119         forwardList= 0;
    120         backwardList= 0;
    121         sortNum= 0;
    122         vertexProp= 0;
    123         sizeProp= 0;
    124         return;
    125     };
    126 
    127     SubGraph (SubGraph *baseG);
    128 
    129     ~SubGraph ()
    130     {
    131         delete [] title;
    132         for (int ii= 0; ii < numArc; ii++)
    133             delete arc[ii];
    134         if (numArc >0)
    135             delete [] arc;
    136         if (forwardList)
    137             delete [] forwardList;
    138         if (backwardList)
    139             delete [] backwardList;
    140         return;
    141     }
    142 
    143     void getName (char *name, int maxlen)
    144     {
    145         strncpy (name, title, maxlen);
    146     }
    147 
    148     int getRuleId () {
    149         return ruleId;
    150     }
    151 
    152     /**  Creates a copy of the graph */
    153     void CopyFastArcs (SubGraph *subG, int startLoc, int endLoc, int offset, int headId, int newHeadId, int tailId, int newTailId);
    154 
    155     /**  Graph construction routines */
    156     void BeginScope (int scopeType, int arg1, int arg2);
    157     void EndScope ();
    158     int  AddItem (int inputLabel, int tagLabel);
    159     int  AddTag (int tagLabel);
    160     void ExpandRules (SubGraph **subList, int *lookupList, int numSubs);
    161 
    162     /**  Reverse subgraph */
    163     void ReverseGraphForOutput ();
    164     void SortLanguage ();
    165     void SortLanguageForMin ();
    166     void SortLanguageReverse ();
    167     void UpdateSortForLanguage ();
    168 
    169     void RemoveRuleConnections ();
    170     void RemoveInternalConnections ();
    171     void RemoveUnreachedConnections (int startPoint, int endPoint);
    172     void RemoveUnreachedConnectionsDebug (int startPoint, int endPoint);
    173     void RemoveTagConnections (int startPoint, int endPoint);
    174     void AddTerminalConnections ();
    175 
    176     void Print ();
    177     void PrintText ();
    178     void PrintWithLabels( GRXMLDoc &p_Doc );
    179     void RemapForSortedOutput ( GRXMLDoc & doc );
    180 
    181     void WriteForwardGraphWithSemantic ( std::string & fileName, GRXMLDoc &p_Doc );
    182     void WriteForwardGraphFile ( std::string & fileName, GRXMLDoc &p_Doc );
    183     void WriteFile ( std::string & fileName, GRXMLDoc &p_Doc );
    184     void WritePhonemeGraphFile( std::string & fileName, GRXMLDoc &p_Doc );
    185     void WriteHMMGraphFile( std::string & fileName, GRXMLDoc &p_Doc );
    186 
    187     void DeterminizeArcs ();
    188     int IsDeterminized (int currId);
    189     void ReduceArcsByEquivalence ();
    190 
    191     void ExpandPhonemes ( GRXMLDoc & doc );
    192     void AddLeftContexts ();
    193     void AddRightContexts ();
    194     void ExpandToHMMs ( GRXMLDoc & doc );
    195     void ExpandIntraWordSilence ( GRXMLDoc & doc );
    196     void ClearOutputs ();
    197     void ShiftOutputsToLeft ();
    198     void FinalProcessing ( GRXMLDoc & doc );
    199 
    200     void DebugPrintDirective (char *dirrData);
    201     void DebugPrintLabel (int labId);
    202 
    203 private:
    204 
    205     int        NewVertexId ();
    206     int        numVertex;
    207 
    208     void       AllocateSpaceForArc ();
    209     NUANArc        *CreateArc (int iLabel, int oLabel, int from, int to);
    210     NUANArc        *InheritArc (NUANArc *arcBase, int id);
    211     NUANArc        *InheritReverseArc (NUANArc *arcBase, int id);
    212     NUANArc        *CreateCopyWithOutput (NUANArc *arcBase, int id);
    213     NUANArc        *InheritReverseArcWithTag (NUANArc *arcBase, int id, int tagId);
    214     int        numArc;
    215     NUANArc        **arc;
    216     int        sortNum;
    217     int        sortRevNum;
    218     int        *forwardList;
    219     int        *backwardList;
    220 
    221     int  ConnectLastScope (int beginScopeId, int endScopeId);
    222     int  CloseScope ();
    223     void UpdateVertexCount (int startLoc);
    224 
    225     int FindFromIndex (int element);
    226     int FindToIndex (int element);
    227     void PullUpBegins (int currId, int baseId, int endId, int procLabel, int *nodeList, int currNum, int maxNum);
    228     void ProcessBegins (int currId, int endId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum);
    229     void PullUpEnds (int currId, int baseId, int initialId, int procLabel, int *nodeList, int currNum, int maxNum);
    230     void ProcessEnds (int currId, int initialId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum);
    231     bool PullUpTags (int currId, int baseId, int initialId, int outTag, int *nodeList, int currNum, int maxNum);
    232     void ProcessTags (int currId, int initialId, int *nodeList, int currNum, int *visitMark, int maxNum);
    233     void ClearDuplicateArcs ();
    234 
    235     void RemoveRuleStarts (int startId, int endId);
    236     void RemoveRuleEnds (int startId, int endId);
    237     void RemoveNulls (int startPoint, int endPoint);
    238     void RemoveForwardConnections (int startId, int endId);
    239     void RemoveBackwardConnections (int startId, int endId);
    240     void ClearLabelledConnections (int labItem);
    241     void RemoveUnvisitedArcs (int initialId, int finalId);
    242     void RemoveMarkedNodes (int *forwardDepth, int *reverseDepth);
    243     void RemoveDiscardedArcs ();
    244     void RenumberNodes ();
    245 
    246     bool EquivalenceTestForward (int firstId, int secondId, int *equivMap);
    247     void ForwardDepthData (int startId, int *depthMap, int depth);
    248     void ReverseDepthData (int startId, int *depthMap, int depth);
    249     void ReverseDepthOnTerminal (int *depthMap);
    250 
    251     void MapGraphVertices (int *equivMap);
    252     void IdentifyEquivalence (int *depthMap, int *equivMap);
    253     void CheckForChangeAndResort (int currId, int *mapList);
    254 
    255     void SortLanguageAtIndices (int begIx, int endIx);
    256     void SortLanguageAtIndicesForMin (int begIx, int endIx);
    257     void SortLanguageAtSortIndices (int begIx, int endIx);
    258     bool CheckSort ();
    259     bool CheckSortReverse ();
    260     void RemoveDuplicatesAtNode (int rixBegin, int rixEnd);
    261     void MergeVertices (int newId, int firstId, int secondId);
    262     void DeterminizeAtVertex (int baseId, DetCache *cache);
    263     int  PairwiseDeterminize (int baseId, int firstId, int fixStart, int secondId, int sixStart, DetCache *cache);
    264     void ClearRuleIds ();
    265     void ViewNode (int currId);
    266 
    267     void ReverseMarkArcs ();
    268     void ReverseMarkOutput (int currId, int initialId, int outId);
    269     void MarkNodesByOutputAndClearArcs ();
    270     void ExpandWordBoundaries ( GRXMLDoc & doc );
    271     void AddInitialFinalSilences ( GRXMLDoc & doc );
    272 
    273     void UpdateVisitationCache (int startNo);
    274     void ClearVisitationCache ();
    275     void SetupVisitationCache ();
    276     void DecVisitationCache (int currId);
    277     void IncVisitationCache (int currId);
    278     int VisitationConsistencyCheck ();
    279 
    280     void CreateNodeProperty () {
    281         vertexProp= new int [BLKSIZE];
    282         vertexMinned= new bool [BLKSIZE];
    283         sizeProp= BLKSIZE;
    284         for (int ii= 0; ii < sizeProp; ii++) {
    285             vertexProp[ii]= 0;
    286             vertexMinned[ii]= false;
    287 	}
    288         return;
    289     }
    290 
    291     void IncNodeProperty (int vertNo) {
    292         if (vertNo >= sizeProp) {
    293             int newSize= (vertNo/BLKSIZE + 1)*BLKSIZE;
    294             int *newPack = new int [newSize];
    295             bool *newMinned = new bool [newSize];
    296             int ii;
    297             for (ii= 0; ii < sizeProp; ii++) {
    298                 newPack[ii]= vertexProp[ii];
    299                 newMinned[ii]= vertexMinned[ii];
    300 	    }
    301             for (ii= sizeProp; ii < newSize; ii++) {
    302                 newPack[ii]= 0;
    303                 newMinned[ii]= false;
    304 	    }
    305             delete [] vertexProp;
    306             delete [] vertexMinned;
    307             vertexProp= newPack;
    308             vertexMinned= newMinned;
    309 	    sizeProp= newSize;
    310         }
    311         vertexProp[vertNo]++;
    312         return;
    313     }
    314 
    315     void DecNodeProperty (int vertNo) {
    316         if (vertNo < sizeProp)
    317             vertexProp[vertNo]--;
    318         return;
    319     }
    320 
    321     void SetNodeProperty (int vertNo, int value) {
    322         if (vertNo < sizeProp)
    323             vertexProp[vertNo]= value;
    324         return;
    325     }
    326 
    327     void SetMinProperty (int vertNo) {
    328         if (vertNo < sizeProp)
    329             vertexMinned[vertNo]= true;
    330         return;
    331     }
    332 
    333     int QueryNodeProperty (int vertNo) {
    334         if (vertNo < sizeProp)
    335             return (vertexProp[vertNo]);
    336         return -1;
    337     }
    338 
    339     int QueryMinProperty (int vertNo) {
    340         if (vertNo < sizeProp)
    341             return (vertexMinned[vertNo]);
    342         return false;
    343     }
    344 
    345     void DeleteNodeProperty () {
    346         delete [] vertexProp;
    347         delete [] vertexMinned;
    348         vertexProp= 0;
    349         vertexMinned= 0;
    350     }
    351 
    352 
    353     /*  Stacking of operations and related data
    354     */
    355     char    *title;
    356     int     ruleId;
    357     int     lastScope;
    358     int     startId;
    359     int     endId;
    360     int     prevStartId;
    361     int     prevEndId;
    362     int     lastId;
    363     int     arg1;
    364     int     arg2;
    365     int     arcLoc;
    366     int     opStack[100000];      // TODO: remove hard coded number
    367     int     popOp;
    368     void    pushScope ();
    369     void    popScope ();
    370     int     silenceId;
    371     int     intraId;
    372     int     *vertexProp;          //  Vertex properties
    373     bool    *vertexMinned;          //  Vertex properties
    374     int     sizeProp;
    375 };
    376 
    377 #endif // __sub_grph_h__
    378