1 /* 2 *************************************************************************** 3 * Copyright (C) 2002-2008 International Business Machines Corporation * 4 * and others. All rights reserved. * 5 *************************************************************************** 6 */ 7 8 // 9 // File: rbbinode.cpp 10 // 11 // Implementation of class RBBINode, which represents a node in the 12 // tree generated when parsing the Rules Based Break Iterator rules. 13 // 14 // This "Class" is actually closer to a struct. 15 // Code using it is expected to directly access fields much of the time. 16 // 17 18 #include "unicode/utypes.h" 19 20 #if !UCONFIG_NO_BREAK_ITERATION 21 22 #include "unicode/unistr.h" 23 #include "unicode/uniset.h" 24 #include "unicode/uchar.h" 25 #include "unicode/parsepos.h" 26 #include "uvector.h" 27 28 #include "rbbirb.h" 29 #include "rbbinode.h" 30 31 #include "uassert.h" 32 33 34 U_NAMESPACE_BEGIN 35 36 #ifdef RBBI_DEBUG 37 static int gLastSerial = 0; 38 #endif 39 40 41 //------------------------------------------------------------------------- 42 // 43 // Constructor. Just set the fields to reasonable default values. 44 // 45 //------------------------------------------------------------------------- 46 RBBINode::RBBINode(NodeType t) : UMemory() { 47 #ifdef RBBI_DEBUG 48 fSerialNum = ++gLastSerial; 49 #endif 50 fType = t; 51 fParent = NULL; 52 fLeftChild = NULL; 53 fRightChild = NULL; 54 fInputSet = NULL; 55 fFirstPos = 0; 56 fLastPos = 0; 57 fNullable = FALSE; 58 fLookAheadEnd = FALSE; 59 fVal = 0; 60 fPrecedence = precZero; 61 62 UErrorCode status = U_ZERO_ERROR; 63 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere 64 fLastPosSet = new UVector(status); 65 fFollowPos = new UVector(status); 66 if (t==opCat) {fPrecedence = precOpCat;} 67 else if (t==opOr) {fPrecedence = precOpOr;} 68 else if (t==opStart) {fPrecedence = precStart;} 69 else if (t==opLParen) {fPrecedence = precLParen;} 70 71 } 72 73 74 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { 75 #ifdef RBBI_DEBUG 76 fSerialNum = ++gLastSerial; 77 #endif 78 fType = other.fType; 79 fParent = NULL; 80 fLeftChild = NULL; 81 fRightChild = NULL; 82 fInputSet = other.fInputSet; 83 fPrecedence = other.fPrecedence; 84 fText = other.fText; 85 fFirstPos = other.fFirstPos; 86 fLastPos = other.fLastPos; 87 fNullable = other.fNullable; 88 fVal = other.fVal; 89 UErrorCode status = U_ZERO_ERROR; 90 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere 91 fLastPosSet = new UVector(status); 92 fFollowPos = new UVector(status); 93 } 94 95 96 //------------------------------------------------------------------------- 97 // 98 // Destructor. Deletes both this node AND any child nodes, 99 // except in the case of variable reference nodes. For 100 // these, the l. child points back to the definition, which 101 // is common for all references to the variable, meaning 102 // it can't be deleted here. 103 // 104 //------------------------------------------------------------------------- 105 RBBINode::~RBBINode() { 106 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); 107 delete fInputSet; 108 fInputSet = NULL; 109 110 switch (this->fType) { 111 case varRef: 112 case setRef: 113 // for these node types, multiple instances point to the same "children" 114 // Storage ownership of children handled elsewhere. Don't delete here. 115 break; 116 117 default: 118 delete fLeftChild; 119 fLeftChild = NULL; 120 delete fRightChild; 121 fRightChild = NULL; 122 } 123 124 125 delete fFirstPosSet; 126 delete fLastPosSet; 127 delete fFollowPos; 128 129 } 130 131 132 //------------------------------------------------------------------------- 133 // 134 // cloneTree Make a copy of the subtree rooted at this node. 135 // Discard any variable references encountered along the way, 136 // and replace with copies of the variable's definitions. 137 // Used to replicate the expression underneath variable 138 // references in preparation for generating the DFA tables. 139 // 140 //------------------------------------------------------------------------- 141 RBBINode *RBBINode::cloneTree() { 142 RBBINode *n; 143 144 if (fType == RBBINode::varRef) { 145 // If the current node is a variable reference, skip over it 146 // and clone the definition of the variable instead. 147 n = fLeftChild->cloneTree(); 148 } else if (fType == RBBINode::uset) { 149 n = this; 150 } else { 151 n = new RBBINode(*this); 152 // Check for null pointer. 153 if (n != NULL) { 154 if (fLeftChild != NULL) { 155 n->fLeftChild = fLeftChild->cloneTree(); 156 n->fLeftChild->fParent = n; 157 } 158 if (fRightChild != NULL) { 159 n->fRightChild = fRightChild->cloneTree(); 160 n->fRightChild->fParent = n; 161 } 162 } 163 } 164 return n; 165 } 166 167 168 169 //------------------------------------------------------------------------- 170 // 171 // flattenVariables Walk a parse tree, replacing any variable 172 // references with a copy of the variable's definition. 173 // Aside from variables, the tree is not changed. 174 // 175 // Return the root of the tree. If the root was not a variable 176 // reference, it remains unchanged - the root we started with 177 // is the root we return. If, however, the root was a variable 178 // reference, the root of the newly cloned replacement tree will 179 // be returned, and the original tree deleted. 180 // 181 // This function works by recursively walking the tree 182 // without doing anything until a variable reference is 183 // found, then calling cloneTree() at that point. Any 184 // nested references are handled by cloneTree(), not here. 185 // 186 //------------------------------------------------------------------------- 187 RBBINode *RBBINode::flattenVariables() { 188 if (fType == varRef) { 189 RBBINode *retNode = fLeftChild->cloneTree(); 190 delete this; 191 return retNode; 192 } 193 194 if (fLeftChild != NULL) { 195 fLeftChild = fLeftChild->flattenVariables(); 196 fLeftChild->fParent = this; 197 } 198 if (fRightChild != NULL) { 199 fRightChild = fRightChild->flattenVariables(); 200 fRightChild->fParent = this; 201 } 202 return this; 203 } 204 205 206 //------------------------------------------------------------------------- 207 // 208 // flattenSets Walk the parse tree, replacing any nodes of type setRef 209 // with a copy of the expression tree for the set. A set's 210 // equivalent expression tree is precomputed and saved as 211 // the left child of the uset node. 212 // 213 //------------------------------------------------------------------------- 214 void RBBINode::flattenSets() { 215 U_ASSERT(fType != setRef); 216 217 if (fLeftChild != NULL) { 218 if (fLeftChild->fType==setRef) { 219 RBBINode *setRefNode = fLeftChild; 220 RBBINode *usetNode = setRefNode->fLeftChild; 221 RBBINode *replTree = usetNode->fLeftChild; 222 fLeftChild = replTree->cloneTree(); 223 fLeftChild->fParent = this; 224 delete setRefNode; 225 } else { 226 fLeftChild->flattenSets(); 227 } 228 } 229 230 if (fRightChild != NULL) { 231 if (fRightChild->fType==setRef) { 232 RBBINode *setRefNode = fRightChild; 233 RBBINode *usetNode = setRefNode->fLeftChild; 234 RBBINode *replTree = usetNode->fLeftChild; 235 fRightChild = replTree->cloneTree(); 236 fRightChild->fParent = this; 237 delete setRefNode; 238 } else { 239 fRightChild->flattenSets(); 240 } 241 } 242 } 243 244 245 246 //------------------------------------------------------------------------- 247 // 248 // findNodes() Locate all the nodes of the specified type, starting 249 // at the specified root. 250 // 251 //------------------------------------------------------------------------- 252 void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { 253 /* test for buffer overflows */ 254 if (U_FAILURE(status)) { 255 return; 256 } 257 if (fType == kind) { 258 dest->addElement(this, status); 259 } 260 if (fLeftChild != NULL) { 261 fLeftChild->findNodes(dest, kind, status); 262 } 263 if (fRightChild != NULL) { 264 fRightChild->findNodes(dest, kind, status); 265 } 266 } 267 268 269 //------------------------------------------------------------------------- 270 // 271 // print. Print out a single node, for debugging. 272 // 273 //------------------------------------------------------------------------- 274 #ifdef RBBI_DEBUG 275 void RBBINode::printNode() { 276 static const char * const nodeTypeNames[] = { 277 "setRef", 278 "uset", 279 "varRef", 280 "leafChar", 281 "lookAhead", 282 "tag", 283 "endMark", 284 "opStart", 285 "opCat", 286 "opOr", 287 "opStar", 288 "opPlus", 289 "opQuestion", 290 "opBreak", 291 "opReverse", 292 "opLParen" 293 }; 294 295 if (this==NULL) { 296 RBBIDebugPrintf("%10p", (void *)this); 297 } else { 298 RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ", 299 (void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild, 300 fSerialNum, fFirstPos, fVal); 301 if (fType == varRef) { 302 RBBI_DEBUG_printUnicodeString(fText); 303 } 304 } 305 RBBIDebugPrintf("\n"); 306 } 307 #endif 308 309 310 #ifdef RBBI_DEBUG 311 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) 312 { 313 int i; 314 for (i=0; i<s.length(); i++) { 315 RBBIDebugPrintf("%c", s.charAt(i)); 316 // putc(s.charAt(i), stdout); 317 } 318 for (i=s.length(); i<minWidth; i++) { 319 RBBIDebugPrintf(" "); 320 } 321 } 322 #endif 323 324 325 //------------------------------------------------------------------------- 326 // 327 // print. Print out the tree of nodes rooted at "this" 328 // 329 //------------------------------------------------------------------------- 330 #ifdef RBBI_DEBUG 331 void RBBINode::printTree(UBool printHeading) { 332 if (printHeading) { 333 RBBIDebugPrintf( "-------------------------------------------------------------------\n" 334 " Address type Parent LeftChild RightChild serial position value\n" 335 ); 336 } 337 this->printNode(); 338 if (this != NULL) { 339 // Only dump the definition under a variable reference if asked to. 340 // Unconditinally dump children of all other node types. 341 if (fType != varRef) { 342 if (fLeftChild != NULL) { 343 fLeftChild->printTree(FALSE); 344 } 345 346 if (fRightChild != NULL) { 347 fRightChild->printTree(FALSE); 348 } 349 } 350 } 351 } 352 #endif 353 354 355 356 U_NAMESPACE_END 357 358 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 359