1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 2001-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 9 #ifndef RBBINODE_H 10 #define RBBINODE_H 11 12 #include "unicode/utypes.h" 13 #include "unicode/unistr.h" 14 #include "unicode/uobject.h" 15 16 // 17 // class RBBINode 18 // 19 // Represents a node in the parse tree generated when reading 20 // a rule file. 21 // 22 23 U_NAMESPACE_BEGIN 24 25 class UnicodeSet; 26 class UVector; 27 28 class RBBINode : public UMemory { 29 public: 30 enum NodeType { 31 setRef, 32 uset, 33 varRef, 34 leafChar, 35 lookAhead, 36 tag, 37 endMark, 38 opStart, 39 opCat, 40 opOr, 41 opStar, 42 opPlus, 43 opQuestion, 44 opBreak, 45 opReverse, 46 opLParen 47 }; 48 49 enum OpPrecedence { 50 precZero, 51 precStart, 52 precLParen, 53 precOpOr, 54 precOpCat 55 }; 56 57 NodeType fType; 58 RBBINode *fParent; 59 RBBINode *fLeftChild; 60 RBBINode *fRightChild; 61 UnicodeSet *fInputSet; // For uset nodes only. 62 OpPrecedence fPrecedence; // For binary ops only. 63 64 UnicodeString fText; // Text corresponding to this node. 65 // May be lazily evaluated when (if) needed 66 // for some node types. 67 int fFirstPos; // Position in the rule source string of the 68 // first text associated with the node. 69 // If there's a left child, this will be the same 70 // as that child's left pos. 71 int fLastPos; // Last position in the rule source string 72 // of any text associated with this node. 73 // If there's a right child, this will be the same 74 // as that child's last postion. 75 76 UBool fNullable; // See Aho. 77 int32_t fVal; // For leafChar nodes, the value. 78 // Values are the character category, 79 // corresponds to columns in the final 80 // state transition table. 81 82 UBool fLookAheadEnd; // For endMark nodes, set TRUE if 83 // marking the end of a look-ahead rule. 84 85 UBool fRuleRoot; // True if this node is the root of a rule. 86 UBool fChainIn; // True if chaining into this rule is allowed 87 // (no '^' present). 88 89 UVector *fFirstPosSet; 90 UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion. 91 UVector *fFollowPos; 92 93 94 RBBINode(NodeType t); 95 RBBINode(const RBBINode &other); 96 ~RBBINode(); 97 98 RBBINode *cloneTree(); 99 RBBINode *flattenVariables(); 100 void flattenSets(); 101 void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status); 102 103 #ifdef RBBI_DEBUG 104 static void printNodeHeader(); 105 static void printNode(const RBBINode *n); 106 static void printTree(const RBBINode *n, UBool withHeading); 107 #endif 108 109 private: 110 RBBINode &operator = (const RBBINode &other); // No defs. 111 UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used. 112 113 #ifdef RBBI_DEBUG 114 public: 115 int fSerialNum; // Debugging aids. 116 #endif 117 }; 118 119 #ifdef RBBI_DEBUG 120 U_CFUNC void 121 RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0); 122 #endif 123 124 U_NAMESPACE_END 125 126 #endif 127 128