Home | History | Annotate | Download | only in common
      1 /*
      2 ***************************************************************************
      3 *   Copyright (C) 2002-2008 International Business Machines Corporation   *
      4 *   and others. All rights reserved.                                      *
      5 ***************************************************************************
      6 */
      7 
      8 //
      9 //  File:  rbbinode.cpp
     10 //
     11 //         Implementation of class RBBINode, which represents a node in the
     12 //         tree generated when parsing the Rules Based Break Iterator rules.
     13 //
     14 //         This "Class" is actually closer to a struct.
     15 //         Code using it is expected to directly access fields much of the time.
     16 //
     17 
     18 #include "unicode/utypes.h"
     19 
     20 #if !UCONFIG_NO_BREAK_ITERATION
     21 
     22 #include "unicode/unistr.h"
     23 #include "unicode/uniset.h"
     24 #include "unicode/uchar.h"
     25 #include "unicode/parsepos.h"
     26 #include "uvector.h"
     27 
     28 #include "rbbirb.h"
     29 #include "rbbinode.h"
     30 
     31 #include "uassert.h"
     32 
     33 
     34 U_NAMESPACE_BEGIN
     35 
     36 #ifdef RBBI_DEBUG
     37 static int  gLastSerial = 0;
     38 #endif
     39 
     40 
     41 //-------------------------------------------------------------------------
     42 //
     43 //    Constructor.   Just set the fields to reasonable default values.
     44 //
     45 //-------------------------------------------------------------------------
     46 RBBINode::RBBINode(NodeType t) : UMemory() {
     47 #ifdef RBBI_DEBUG
     48     fSerialNum    = ++gLastSerial;
     49 #endif
     50     fType         = t;
     51     fParent       = NULL;
     52     fLeftChild    = NULL;
     53     fRightChild   = NULL;
     54     fInputSet     = NULL;
     55     fFirstPos     = 0;
     56     fLastPos      = 0;
     57     fNullable     = FALSE;
     58     fLookAheadEnd = FALSE;
     59     fVal          = 0;
     60     fPrecedence   = precZero;
     61 
     62     UErrorCode     status = U_ZERO_ERROR;
     63     fFirstPosSet  = new UVector(status);  // TODO - get a real status from somewhere
     64     fLastPosSet   = new UVector(status);
     65     fFollowPos    = new UVector(status);
     66     if      (t==opCat)    {fPrecedence = precOpCat;}
     67     else if (t==opOr)     {fPrecedence = precOpOr;}
     68     else if (t==opStart)  {fPrecedence = precStart;}
     69     else if (t==opLParen) {fPrecedence = precLParen;}
     70 
     71 }
     72 
     73 
     74 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
     75 #ifdef RBBI_DEBUG
     76     fSerialNum   = ++gLastSerial;
     77 #endif
     78     fType        = other.fType;
     79     fParent      = NULL;
     80     fLeftChild   = NULL;
     81     fRightChild  = NULL;
     82     fInputSet    = other.fInputSet;
     83     fPrecedence  = other.fPrecedence;
     84     fText        = other.fText;
     85     fFirstPos    = other.fFirstPos;
     86     fLastPos     = other.fLastPos;
     87     fNullable    = other.fNullable;
     88     fVal         = other.fVal;
     89     UErrorCode     status = U_ZERO_ERROR;
     90     fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
     91     fLastPosSet  = new UVector(status);
     92     fFollowPos   = new UVector(status);
     93 }
     94 
     95 
     96 //-------------------------------------------------------------------------
     97 //
     98 //    Destructor.   Deletes both this node AND any child nodes,
     99 //                  except in the case of variable reference nodes.  For
    100 //                  these, the l. child points back to the definition, which
    101 //                  is common for all references to the variable, meaning
    102 //                  it can't be deleted here.
    103 //
    104 //-------------------------------------------------------------------------
    105 RBBINode::~RBBINode() {
    106     // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
    107     delete fInputSet;
    108     fInputSet = NULL;
    109 
    110     switch (this->fType) {
    111     case varRef:
    112     case setRef:
    113         // for these node types, multiple instances point to the same "children"
    114         // Storage ownership of children handled elsewhere.  Don't delete here.
    115         break;
    116 
    117     default:
    118         delete        fLeftChild;
    119         fLeftChild =   NULL;
    120         delete        fRightChild;
    121         fRightChild = NULL;
    122     }
    123 
    124 
    125     delete fFirstPosSet;
    126     delete fLastPosSet;
    127     delete fFollowPos;
    128 
    129 }
    130 
    131 
    132 //-------------------------------------------------------------------------
    133 //
    134 //    cloneTree     Make a copy of the subtree rooted at this node.
    135 //                  Discard any variable references encountered along the way,
    136 //                  and replace with copies of the variable's definitions.
    137 //                  Used to replicate the expression underneath variable
    138 //                  references in preparation for generating the DFA tables.
    139 //
    140 //-------------------------------------------------------------------------
    141 RBBINode *RBBINode::cloneTree() {
    142     RBBINode    *n;
    143 
    144     if (fType == RBBINode::varRef) {
    145         // If the current node is a variable reference, skip over it
    146         //   and clone the definition of the variable instead.
    147         n = fLeftChild->cloneTree();
    148     } else if (fType == RBBINode::uset) {
    149         n = this;
    150     } else {
    151         n = new RBBINode(*this);
    152         // Check for null pointer.
    153         if (n != NULL) {
    154             if (fLeftChild != NULL) {
    155                 n->fLeftChild          = fLeftChild->cloneTree();
    156                 n->fLeftChild->fParent = n;
    157             }
    158             if (fRightChild != NULL) {
    159                 n->fRightChild          = fRightChild->cloneTree();
    160                 n->fRightChild->fParent = n;
    161             }
    162         }
    163     }
    164     return n;
    165 }
    166 
    167 
    168 
    169 //-------------------------------------------------------------------------
    170 //
    171 //   flattenVariables   Walk a parse tree, replacing any variable
    172 //                      references with a copy of the variable's definition.
    173 //                      Aside from variables, the tree is not changed.
    174 //
    175 //                      Return the root of the tree.  If the root was not a variable
    176 //                      reference, it remains unchanged - the root we started with
    177 //                      is the root we return.  If, however, the root was a variable
    178 //                      reference, the root of the newly cloned replacement tree will
    179 //                      be returned, and the original tree deleted.
    180 //
    181 //                      This function works by recursively walking the tree
    182 //                      without doing anything until a variable reference is
    183 //                      found, then calling cloneTree() at that point.  Any
    184 //                      nested references are handled by cloneTree(), not here.
    185 //
    186 //-------------------------------------------------------------------------
    187 RBBINode *RBBINode::flattenVariables() {
    188     if (fType == varRef) {
    189         RBBINode *retNode = fLeftChild->cloneTree();
    190         delete this;
    191         return retNode;
    192     }
    193 
    194     if (fLeftChild != NULL) {
    195         fLeftChild = fLeftChild->flattenVariables();
    196         fLeftChild->fParent  = this;
    197     }
    198     if (fRightChild != NULL) {
    199         fRightChild = fRightChild->flattenVariables();
    200         fRightChild->fParent = this;
    201     }
    202     return this;
    203 }
    204 
    205 
    206 //-------------------------------------------------------------------------
    207 //
    208 //  flattenSets    Walk the parse tree, replacing any nodes of type setRef
    209 //                 with a copy of the expression tree for the set.  A set's
    210 //                 equivalent expression tree is precomputed and saved as
    211 //                 the left child of the uset node.
    212 //
    213 //-------------------------------------------------------------------------
    214 void RBBINode::flattenSets() {
    215     U_ASSERT(fType != setRef);
    216 
    217     if (fLeftChild != NULL) {
    218         if (fLeftChild->fType==setRef) {
    219             RBBINode *setRefNode = fLeftChild;
    220             RBBINode *usetNode   = setRefNode->fLeftChild;
    221             RBBINode *replTree   = usetNode->fLeftChild;
    222             fLeftChild           = replTree->cloneTree();
    223             fLeftChild->fParent  = this;
    224             delete setRefNode;
    225         } else {
    226             fLeftChild->flattenSets();
    227         }
    228     }
    229 
    230     if (fRightChild != NULL) {
    231         if (fRightChild->fType==setRef) {
    232             RBBINode *setRefNode = fRightChild;
    233             RBBINode *usetNode   = setRefNode->fLeftChild;
    234             RBBINode *replTree   = usetNode->fLeftChild;
    235             fRightChild           = replTree->cloneTree();
    236             fRightChild->fParent  = this;
    237             delete setRefNode;
    238         } else {
    239             fRightChild->flattenSets();
    240         }
    241     }
    242 }
    243 
    244 
    245 
    246 //-------------------------------------------------------------------------
    247 //
    248 //   findNodes()     Locate all the nodes of the specified type, starting
    249 //                   at the specified root.
    250 //
    251 //-------------------------------------------------------------------------
    252 void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
    253     /* test for buffer overflows */
    254     if (U_FAILURE(status)) {
    255         return;
    256     }
    257     if (fType == kind) {
    258         dest->addElement(this, status);
    259     }
    260     if (fLeftChild != NULL) {
    261         fLeftChild->findNodes(dest, kind, status);
    262     }
    263     if (fRightChild != NULL) {
    264         fRightChild->findNodes(dest, kind, status);
    265     }
    266 }
    267 
    268 
    269 //-------------------------------------------------------------------------
    270 //
    271 //    print.         Print out a single node, for debugging.
    272 //
    273 //-------------------------------------------------------------------------
    274 #ifdef RBBI_DEBUG
    275 void RBBINode::printNode() {
    276     static const char * const nodeTypeNames[] = {
    277                 "setRef",
    278                 "uset",
    279                 "varRef",
    280                 "leafChar",
    281                 "lookAhead",
    282                 "tag",
    283                 "endMark",
    284                 "opStart",
    285                 "opCat",
    286                 "opOr",
    287                 "opStar",
    288                 "opPlus",
    289                 "opQuestion",
    290                 "opBreak",
    291                 "opReverse",
    292                 "opLParen"
    293     };
    294 
    295     if (this==NULL) {
    296         RBBIDebugPrintf("%10p", (void *)this);
    297     } else {
    298         RBBIDebugPrintf("%10p  %12s  %10p  %10p  %10p      %4d     %6d   %d ",
    299             (void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild,
    300             fSerialNum, fFirstPos, fVal);
    301         if (fType == varRef) {
    302             RBBI_DEBUG_printUnicodeString(fText);
    303         }
    304     }
    305     RBBIDebugPrintf("\n");
    306 }
    307 #endif
    308 
    309 
    310 #ifdef RBBI_DEBUG
    311 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
    312 {
    313     int i;
    314     for (i=0; i<s.length(); i++) {
    315         RBBIDebugPrintf("%c", s.charAt(i));
    316         // putc(s.charAt(i), stdout);
    317     }
    318     for (i=s.length(); i<minWidth; i++) {
    319         RBBIDebugPrintf(" ");
    320     }
    321 }
    322 #endif
    323 
    324 
    325 //-------------------------------------------------------------------------
    326 //
    327 //    print.         Print out the tree of nodes rooted at "this"
    328 //
    329 //-------------------------------------------------------------------------
    330 #ifdef RBBI_DEBUG
    331 void RBBINode::printTree(UBool printHeading) {
    332     if (printHeading) {
    333         RBBIDebugPrintf( "-------------------------------------------------------------------\n"
    334                          "    Address       type         Parent   LeftChild  RightChild    serial  position value\n"
    335               );
    336     }
    337     this->printNode();
    338     if (this != NULL) {
    339         // Only dump the definition under a variable reference if asked to.
    340         // Unconditinally dump children of all other node types.
    341         if (fType != varRef) {
    342             if (fLeftChild != NULL) {
    343                 fLeftChild->printTree(FALSE);
    344             }
    345 
    346             if (fRightChild != NULL) {
    347                 fRightChild->printTree(FALSE);
    348             }
    349         }
    350     }
    351 }
    352 #endif
    353 
    354 
    355 
    356 U_NAMESPACE_END
    357 
    358 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    359