Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ***************************************************************************
      5 *   Copyright (C) 2002-2016 International Business Machines Corporation   *
      6 *   and others. All rights reserved.                                      *
      7 ***************************************************************************
      8 */
      9 
     10 //
     11 //  File:  rbbinode.cpp
     12 //
     13 //         Implementation of class RBBINode, which represents a node in the
     14 //         tree generated when parsing the Rules Based Break Iterator rules.
     15 //
     16 //         This "Class" is actually closer to a struct.
     17 //         Code using it is expected to directly access fields much of the time.
     18 //
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_BREAK_ITERATION
     23 
     24 #include "unicode/unistr.h"
     25 #include "unicode/uniset.h"
     26 #include "unicode/uchar.h"
     27 #include "unicode/parsepos.h"
     28 
     29 #include "cstr.h"
     30 #include "uvector.h"
     31 
     32 #include "rbbirb.h"
     33 #include "rbbinode.h"
     34 
     35 #include "uassert.h"
     36 
     37 
     38 U_NAMESPACE_BEGIN
     39 
     40 #ifdef RBBI_DEBUG
     41 static int  gLastSerial = 0;
     42 #endif
     43 
     44 
     45 //-------------------------------------------------------------------------
     46 //
     47 //    Constructor.   Just set the fields to reasonable default values.
     48 //
     49 //-------------------------------------------------------------------------
     50 RBBINode::RBBINode(NodeType t) : UMemory() {
     51 #ifdef RBBI_DEBUG
     52     fSerialNum    = ++gLastSerial;
     53 #endif
     54     fType         = t;
     55     fParent       = NULL;
     56     fLeftChild    = NULL;
     57     fRightChild   = NULL;
     58     fInputSet     = NULL;
     59     fFirstPos     = 0;
     60     fLastPos      = 0;
     61     fNullable     = FALSE;
     62     fLookAheadEnd = FALSE;
     63     fRuleRoot     = FALSE;
     64     fChainIn      = FALSE;
     65     fVal          = 0;
     66     fPrecedence   = precZero;
     67 
     68     UErrorCode     status = U_ZERO_ERROR;
     69     fFirstPosSet  = new UVector(status);  // TODO - get a real status from somewhere
     70     fLastPosSet   = new UVector(status);
     71     fFollowPos    = new UVector(status);
     72     if      (t==opCat)    {fPrecedence = precOpCat;}
     73     else if (t==opOr)     {fPrecedence = precOpOr;}
     74     else if (t==opStart)  {fPrecedence = precStart;}
     75     else if (t==opLParen) {fPrecedence = precLParen;}
     76 
     77 }
     78 
     79 
     80 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
     81 #ifdef RBBI_DEBUG
     82     fSerialNum   = ++gLastSerial;
     83 #endif
     84     fType        = other.fType;
     85     fParent      = NULL;
     86     fLeftChild   = NULL;
     87     fRightChild  = NULL;
     88     fInputSet    = other.fInputSet;
     89     fPrecedence  = other.fPrecedence;
     90     fText        = other.fText;
     91     fFirstPos    = other.fFirstPos;
     92     fLastPos     = other.fLastPos;
     93     fNullable    = other.fNullable;
     94     fVal         = other.fVal;
     95     fRuleRoot    = FALSE;
     96     fChainIn     = other.fChainIn;
     97     UErrorCode     status = U_ZERO_ERROR;
     98     fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
     99     fLastPosSet  = new UVector(status);
    100     fFollowPos   = new UVector(status);
    101 }
    102 
    103 
    104 //-------------------------------------------------------------------------
    105 //
    106 //    Destructor.   Deletes both this node AND any child nodes,
    107 //                  except in the case of variable reference nodes.  For
    108 //                  these, the l. child points back to the definition, which
    109 //                  is common for all references to the variable, meaning
    110 //                  it can't be deleted here.
    111 //
    112 //-------------------------------------------------------------------------
    113 RBBINode::~RBBINode() {
    114     // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
    115     delete fInputSet;
    116     fInputSet = NULL;
    117 
    118     switch (this->fType) {
    119     case varRef:
    120     case setRef:
    121         // for these node types, multiple instances point to the same "children"
    122         // Storage ownership of children handled elsewhere.  Don't delete here.
    123         break;
    124 
    125     default:
    126         delete        fLeftChild;
    127         fLeftChild =   NULL;
    128         delete        fRightChild;
    129         fRightChild = NULL;
    130     }
    131 
    132 
    133     delete fFirstPosSet;
    134     delete fLastPosSet;
    135     delete fFollowPos;
    136 
    137 }
    138 
    139 
    140 //-------------------------------------------------------------------------
    141 //
    142 //    cloneTree     Make a copy of the subtree rooted at this node.
    143 //                  Discard any variable references encountered along the way,
    144 //                  and replace with copies of the variable's definitions.
    145 //                  Used to replicate the expression underneath variable
    146 //                  references in preparation for generating the DFA tables.
    147 //
    148 //-------------------------------------------------------------------------
    149 RBBINode *RBBINode::cloneTree() {
    150     RBBINode    *n;
    151 
    152     if (fType == RBBINode::varRef) {
    153         // If the current node is a variable reference, skip over it
    154         //   and clone the definition of the variable instead.
    155         n = fLeftChild->cloneTree();
    156     } else if (fType == RBBINode::uset) {
    157         n = this;
    158     } else {
    159         n = new RBBINode(*this);
    160         // Check for null pointer.
    161         if (n != NULL) {
    162             if (fLeftChild != NULL) {
    163                 n->fLeftChild          = fLeftChild->cloneTree();
    164                 n->fLeftChild->fParent = n;
    165             }
    166             if (fRightChild != NULL) {
    167                 n->fRightChild          = fRightChild->cloneTree();
    168                 n->fRightChild->fParent = n;
    169             }
    170         }
    171     }
    172     return n;
    173 }
    174 
    175 
    176 
    177 //-------------------------------------------------------------------------
    178 //
    179 //   flattenVariables   Walk a parse tree, replacing any variable
    180 //                      references with a copy of the variable's definition.
    181 //                      Aside from variables, the tree is not changed.
    182 //
    183 //                      Return the root of the tree.  If the root was not a variable
    184 //                      reference, it remains unchanged - the root we started with
    185 //                      is the root we return.  If, however, the root was a variable
    186 //                      reference, the root of the newly cloned replacement tree will
    187 //                      be returned, and the original tree deleted.
    188 //
    189 //                      This function works by recursively walking the tree
    190 //                      without doing anything until a variable reference is
    191 //                      found, then calling cloneTree() at that point.  Any
    192 //                      nested references are handled by cloneTree(), not here.
    193 //
    194 //-------------------------------------------------------------------------
    195 RBBINode *RBBINode::flattenVariables() {
    196     if (fType == varRef) {
    197         RBBINode *retNode  = fLeftChild->cloneTree();
    198         if (retNode != NULL) {
    199             retNode->fRuleRoot = this->fRuleRoot;
    200             retNode->fChainIn  = this->fChainIn;
    201         }
    202         delete this;   // TODO: undefined behavior. Fix.
    203         return retNode;
    204     }
    205 
    206     if (fLeftChild != NULL) {
    207         fLeftChild = fLeftChild->flattenVariables();
    208         fLeftChild->fParent  = this;
    209     }
    210     if (fRightChild != NULL) {
    211         fRightChild = fRightChild->flattenVariables();
    212         fRightChild->fParent = this;
    213     }
    214     return this;
    215 }
    216 
    217 
    218 //-------------------------------------------------------------------------
    219 //
    220 //  flattenSets    Walk the parse tree, replacing any nodes of type setRef
    221 //                 with a copy of the expression tree for the set.  A set's
    222 //                 equivalent expression tree is precomputed and saved as
    223 //                 the left child of the uset node.
    224 //
    225 //-------------------------------------------------------------------------
    226 void RBBINode::flattenSets() {
    227     U_ASSERT(fType != setRef);
    228 
    229     if (fLeftChild != NULL) {
    230         if (fLeftChild->fType==setRef) {
    231             RBBINode *setRefNode = fLeftChild;
    232             RBBINode *usetNode   = setRefNode->fLeftChild;
    233             RBBINode *replTree   = usetNode->fLeftChild;
    234             fLeftChild           = replTree->cloneTree();
    235             fLeftChild->fParent  = this;
    236             delete setRefNode;
    237         } else {
    238             fLeftChild->flattenSets();
    239         }
    240     }
    241 
    242     if (fRightChild != NULL) {
    243         if (fRightChild->fType==setRef) {
    244             RBBINode *setRefNode = fRightChild;
    245             RBBINode *usetNode   = setRefNode->fLeftChild;
    246             RBBINode *replTree   = usetNode->fLeftChild;
    247             fRightChild           = replTree->cloneTree();
    248             fRightChild->fParent  = this;
    249             delete setRefNode;
    250         } else {
    251             fRightChild->flattenSets();
    252         }
    253     }
    254 }
    255 
    256 
    257 
    258 //-------------------------------------------------------------------------
    259 //
    260 //   findNodes()     Locate all the nodes of the specified type, starting
    261 //                   at the specified root.
    262 //
    263 //-------------------------------------------------------------------------
    264 void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
    265     /* test for buffer overflows */
    266     if (U_FAILURE(status)) {
    267         return;
    268     }
    269     if (fType == kind) {
    270         dest->addElement(this, status);
    271     }
    272     if (fLeftChild != NULL) {
    273         fLeftChild->findNodes(dest, kind, status);
    274     }
    275     if (fRightChild != NULL) {
    276         fRightChild->findNodes(dest, kind, status);
    277     }
    278 }
    279 
    280 
    281 //-------------------------------------------------------------------------
    282 //
    283 //    print.         Print out a single node, for debugging.
    284 //
    285 //-------------------------------------------------------------------------
    286 #ifdef RBBI_DEBUG
    287 
    288 static int32_t serial(const RBBINode *node) {
    289     return (node == NULL? -1 : node->fSerialNum);
    290 }
    291 
    292 
    293 void RBBINode::printNode(const RBBINode *node) {
    294     static const char * const nodeTypeNames[] = {
    295                 "setRef",
    296                 "uset",
    297                 "varRef",
    298                 "leafChar",
    299                 "lookAhead",
    300                 "tag",
    301                 "endMark",
    302                 "opStart",
    303                 "opCat",
    304                 "opOr",
    305                 "opStar",
    306                 "opPlus",
    307                 "opQuestion",
    308                 "opBreak",
    309                 "opReverse",
    310                 "opLParen"
    311     };
    312 
    313     if (node==NULL) {
    314         RBBIDebugPrintf("%10p", (void *)node);
    315     } else {
    316         RBBIDebugPrintf("%10p %5d %12s %c%c  %5d       %5d     %5d       %6d     %d ",
    317             (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
    318             node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
    319             serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
    320             node->fFirstPos, node->fVal);
    321         if (node->fType == varRef) {
    322             RBBI_DEBUG_printUnicodeString(node->fText);
    323         }
    324     }
    325     RBBIDebugPrintf("\n");
    326 }
    327 #endif
    328 
    329 
    330 #ifdef RBBI_DEBUG
    331 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
    332     RBBIDebugPrintf("%*s", minWidth, CStr(s)());
    333 }
    334 #endif
    335 
    336 
    337 //-------------------------------------------------------------------------
    338 //
    339 //    print.         Print out the tree of nodes rooted at "this"
    340 //
    341 //-------------------------------------------------------------------------
    342 #ifdef RBBI_DEBUG
    343 void RBBINode::printNodeHeader() {
    344     RBBIDebugPrintf(" Address   serial        type     LeftChild  RightChild   Parent   position value\n");
    345 }
    346 
    347 void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
    348     if (printHeading) {
    349         printNodeHeader();
    350     }
    351     printNode(node);
    352     if (node != NULL) {
    353         // Only dump the definition under a variable reference if asked to.
    354         // Unconditinally dump children of all other node types.
    355         if (node->fType != varRef) {
    356             if (node->fLeftChild != NULL) {
    357                 printTree(node->fLeftChild, FALSE);
    358             }
    359 
    360             if (node->fRightChild != NULL) {
    361                 printTree(node->fRightChild, FALSE);
    362             }
    363         }
    364     }
    365 }
    366 #endif
    367 
    368 
    369 
    370 U_NAMESPACE_END
    371 
    372 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    373