1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: bytestriebuilder.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010sep25 12 * created by: Markus W. Scherer 13 */ 14 15 /** 16 * \file 17 * \brief C++ API: Builder for icu::BytesTrie 18 */ 19 20 #ifndef __BYTESTRIEBUILDER_H__ 21 #define __BYTESTRIEBUILDER_H__ 22 23 #include "unicode/utypes.h" 24 #include "unicode/bytestrie.h" 25 #include "unicode/stringpiece.h" 26 #include "unicode/stringtriebuilder.h" 27 28 U_NAMESPACE_BEGIN 29 30 class BytesTrieElement; 31 class CharString; 32 33 /** 34 * Builder class for BytesTrie. 35 * 36 * This class is not intended for public subclassing. 37 * @stable ICU 4.8 38 */ 39 class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { 40 public: 41 /** 42 * Constructs an empty builder. 43 * @param errorCode Standard ICU error code. 44 * @stable ICU 4.8 45 */ 46 BytesTrieBuilder(UErrorCode &errorCode); 47 48 /** 49 * Destructor. 50 * @stable ICU 4.8 51 */ 52 virtual ~BytesTrieBuilder(); 53 54 /** 55 * Adds a (byte sequence, value) pair. 56 * The byte sequence must be unique. 57 * The bytes will be copied; the builder does not keep 58 * a reference to the input StringPiece or its data(). 59 * @param s The input byte sequence. 60 * @param value The value associated with this byte sequence. 61 * @param errorCode Standard ICU error code. Its input value must 62 * pass the U_SUCCESS() test, or else the function returns 63 * immediately. Check for U_FAILURE() on output or use with 64 * function chaining. (See User Guide for details.) 65 * @return *this 66 * @stable ICU 4.8 67 */ 68 BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); 69 70 /** 71 * Builds a BytesTrie for the add()ed data. 72 * Once built, no further data can be add()ed until clear() is called. 73 * 74 * This method passes ownership of the builder's internal result array to the new trie object. 75 * Another call to any build() variant will re-serialize the trie. 76 * After clear() has been called, a new array will be used as well. 77 * @param buildOption Build option, see UStringTrieBuildOption. 78 * @param errorCode Standard ICU error code. Its input value must 79 * pass the U_SUCCESS() test, or else the function returns 80 * immediately. Check for U_FAILURE() on output or use with 81 * function chaining. (See User Guide for details.) 82 * @return A new BytesTrie for the add()ed data. 83 * @stable ICU 4.8 84 */ 85 BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 86 87 /** 88 * Builds a BytesTrie for the add()ed data and byte-serializes it. 89 * Once built, no further data can be add()ed until clear() is called. 90 * 91 * Multiple calls to buildStringPiece() return StringPieces referring to the 92 * builder's same byte array, without rebuilding. 93 * If buildStringPiece() is called after build(), the trie will be 94 * re-serialized into a new array. 95 * If build() is called after buildStringPiece(), the trie object will become 96 * the owner of the previously returned array. 97 * After clear() has been called, a new array will be used as well. 98 * @param buildOption Build option, see UStringTrieBuildOption. 99 * @param errorCode Standard ICU error code. Its input value must 100 * pass the U_SUCCESS() test, or else the function returns 101 * immediately. Check for U_FAILURE() on output or use with 102 * function chaining. (See User Guide for details.) 103 * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. 104 * @stable ICU 4.8 105 */ 106 StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 107 108 /** 109 * Removes all (byte sequence, value) pairs. 110 * New data can then be add()ed and a new trie can be built. 111 * @return *this 112 * @stable ICU 4.8 113 */ 114 BytesTrieBuilder &clear(); 115 116 private: 117 BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor 118 BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator 119 120 void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 121 122 virtual int32_t getElementStringLength(int32_t i) const; 123 virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; 124 virtual int32_t getElementValue(int32_t i) const; 125 126 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; 127 128 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; 129 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; 130 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; 131 132 virtual UBool matchNodesCanHaveValues() const { return FALSE; } 133 134 virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } 135 virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } 136 virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } 137 138 #ifndef U_HIDE_INTERNAL_API 139 /** 140 * @internal 141 */ 142 class BTLinearMatchNode : public LinearMatchNode { 143 public: 144 BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); 145 virtual UBool operator==(const Node &other) const; 146 virtual void write(StringTrieBuilder &builder); 147 private: 148 const char *s; 149 }; 150 #endif /* U_HIDE_INTERNAL_API */ 151 152 virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 153 Node *nextNode) const; 154 155 UBool ensureCapacity(int32_t length); 156 virtual int32_t write(int32_t byte); 157 int32_t write(const char *b, int32_t length); 158 virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); 159 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); 160 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); 161 virtual int32_t writeDeltaTo(int32_t jumpTarget); 162 163 CharString *strings; // Pointer not object so we need not #include internal charstr.h. 164 BytesTrieElement *elements; 165 int32_t elementsCapacity; 166 int32_t elementsLength; 167 168 // Byte serialization of the trie. 169 // Grows from the back: bytesLength measures from the end of the buffer! 170 char *bytes; 171 int32_t bytesCapacity; 172 int32_t bytesLength; 173 }; 174 175 U_NAMESPACE_END 176 177 #endif // __BYTESTRIEBUILDER_H__ 178