1 /** 2 ******************************************************************************* 3 * Copyright (C) 2006, International Business Machines Corporation and others. * 4 * All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 #ifndef TRIEDICT_H 9 #define TRIEDICT_H 10 11 #include "unicode/utypes.h" 12 #include "unicode/uobject.h" 13 #include "unicode/utext.h" 14 15 struct UEnumeration; 16 struct UDataSwapper; 17 struct UDataMemory; 18 19 /** 20 * <p>UDataSwapFn function for use in swapping a compact dictionary.</p> 21 * 22 * @param ds Pointer to UDataSwapper containing global data about the 23 * transformation and function pointers for handling primitive 24 * types. 25 * @param inData Pointer to the input data to be transformed or examined. 26 * @param length Length of the data, counting bytes. May be -1 for preflighting. 27 * If length>=0, then transform the data. 28 * If length==-1, then only determine the length of the data. 29 * The length cannot be determined from the data itself for all 30 * types of data (e.g., not for simple arrays of integers). 31 * @param outData Pointer to the output data buffer. 32 * If length>=0 (transformation), then the output buffer must 33 * have a capacity of at least length. 34 * If length==-1, then outData will not be used and can be NULL. 35 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must 36 * fulfill U_SUCCESS on input. 37 * @return The actual length of the data. 38 * 39 * @see UDataSwapper 40 */ 41 42 U_CAPI int32_t U_EXPORT2 43 triedict_swap(const UDataSwapper *ds, 44 const void *inData, int32_t length, void *outData, 45 UErrorCode *pErrorCode); 46 47 U_NAMESPACE_BEGIN 48 49 class StringEnumeration; 50 51 /******************************************************************* 52 * TrieWordDictionary 53 */ 54 55 /** 56 * <p>TrieWordDictionary is an abstract class that represents a word 57 * dictionary based on a trie. The base protocol is read-only. 58 * Subclasses may allow writing.</p> 59 */ 60 class U_COMMON_API TrieWordDictionary : public UMemory { 61 public: 62 63 /** 64 * <p>Default constructor.</p> 65 * 66 */ 67 TrieWordDictionary(); 68 69 /** 70 * <p>Virtual destructor.</p> 71 */ 72 virtual ~TrieWordDictionary(); 73 74 /** 75 * <p>Returns true if the dictionary contains values associated with each word.</p> 76 */ 77 virtual UBool getValued() const = 0; 78 79 /** 80 * <p>Find dictionary words that match the text.</p> 81 * 82 * @param text A UText representing the text. The 83 * iterator is left after the longest prefix match in the dictionary. 84 * @param maxLength The maximum number of code units to match. 85 * @param lengths An array that is filled with the lengths of words that matched. 86 * @param count Filled with the number of elements output in lengths. 87 * @param limit The size of the lengths array; this limits the number of words output. 88 * @param values An array that is filled with the values associated with the matched words. 89 * @return The number of characters in text that were matched. 90 */ 91 virtual int32_t matches( UText *text, 92 int32_t maxLength, 93 int32_t *lengths, 94 int &count, 95 int limit, 96 uint16_t *values = NULL) const = 0; 97 98 /** 99 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> 100 * 101 * @param status A status code recording the success of the call. 102 * @return A StringEnumeration that will iterate through the whole dictionary. 103 * The caller is responsible for closing it. The order is unspecified. 104 */ 105 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; 106 107 }; 108 109 /******************************************************************* 110 * MutableTrieDictionary 111 */ 112 113 /** 114 * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be 115 * added.</p> 116 */ 117 118 struct TernaryNode; // Forwards declaration 119 120 class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary { 121 private: 122 /** 123 * The root node of the trie 124 * @internal 125 */ 126 127 TernaryNode *fTrie; 128 129 /** 130 * A UText for internal use 131 * @internal 132 */ 133 134 UText *fIter; 135 136 /** 137 * A UText for internal use 138 * @internal 139 */ 140 UBool fValued; 141 142 friend class CompactTrieDictionary; // For fast conversion 143 144 public: 145 146 /** 147 * <p>Constructor.</p> 148 * 149 * @param median A UChar around which to balance the trie. Ideally, it should 150 * begin at least one word that is near the median of the set in the dictionary 151 * @param status A status code recording the success of the call. 152 * @param containsValue True if the dictionary stores values associated with each word. 153 */ 154 MutableTrieDictionary( UChar median, UErrorCode &status, UBool containsValue = FALSE ); 155 156 /** 157 * <p>Virtual destructor.</p> 158 */ 159 virtual ~MutableTrieDictionary(); 160 161 /** 162 * Indicate whether the MutableTrieDictionary stores values associated with each word 163 */ 164 void setValued(UBool valued){ 165 fValued = valued; 166 } 167 168 /** 169 * <p>Returns true if the dictionary contains values associated with each word.</p> 170 */ 171 virtual UBool getValued() const { 172 return fValued; 173 } 174 175 /** 176 * <p>Find dictionary words that match the text.</p> 177 * 178 * @param text A UText representing the text. The 179 * iterator is left after the longest prefix match in the dictionary. 180 * @param maxLength The maximum number of code units to match. 181 * @param lengths An array that is filled with the lengths of words that matched. 182 * @param count Filled with the number of elements output in lengths. 183 * @param limit The size of the lengths array; this limits the number of words output. 184 * @param values An array that is filled with the values associated with the matched words. 185 * @return The number of characters in text that were matched. 186 */ 187 virtual int32_t matches( UText *text, 188 int32_t maxLength, 189 int32_t *lengths, 190 int &count, 191 int limit, 192 uint16_t *values = NULL) const; 193 194 /** 195 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> 196 * 197 * @param status A status code recording the success of the call. 198 * @return A StringEnumeration that will iterate through the whole dictionary. 199 * The caller is responsible for closing it. The order is unspecified. 200 */ 201 virtual StringEnumeration *openWords( UErrorCode &status ) const; 202 203 /** 204 * <p>Add one word to the dictionary with an optional associated value.</p> 205 * 206 * @param word A UChar buffer containing the word. 207 * @param length The length of the word. 208 * @param status The resultant status. 209 * @param value The nonzero value associated with this word. 210 */ 211 virtual void addWord( const UChar *word, 212 int32_t length, 213 UErrorCode &status, 214 uint16_t value = 0); 215 216 #if 0 217 /** 218 * <p>Add all strings from a UEnumeration to the dictionary.</p> 219 * 220 * @param words A UEnumeration that will return the desired words. 221 * @param status The resultant status 222 */ 223 virtual void addWords( UEnumeration *words, UErrorCode &status ); 224 #endif 225 226 protected: 227 /** 228 * <p>Search the dictionary for matches.</p> 229 * 230 * @param text A UText representing the text. The 231 * iterator is left after the longest prefix match in the dictionary. 232 * @param maxLength The maximum number of code units to match. 233 * @param lengths An array that is filled with the lengths of words that matched. 234 * @param count Filled with the number of elements output in lengths. 235 * @param limit The size of the lengths array; this limits the number of words output. 236 * @param parent The parent of the current node. 237 * @param pMatched The returned parent node matched the input/ 238 * @param values An array that is filled with the values associated with the matched words. 239 * @return The number of characters in text that were matched. 240 */ 241 virtual int32_t search( UText *text, 242 int32_t maxLength, 243 int32_t *lengths, 244 int &count, 245 int limit, 246 TernaryNode *&parent, 247 UBool &pMatched, 248 uint16_t *values = NULL) const; 249 250 private: 251 /** 252 * <p>Private constructor. The root node it not allocated.</p> 253 * 254 * @param status A status code recording the success of the call. 255 * @param containsValues True if the dictionary will store a value associated 256 * with each word added. 257 */ 258 MutableTrieDictionary( UErrorCode &status, UBool containsValues = false ); 259 }; 260 261 /******************************************************************* 262 * CompactTrieDictionary 263 */ 264 265 //forward declarations 266 struct CompactTrieHeader; 267 struct CompactTrieInfo; 268 269 /** 270 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted 271 * to save space.</p> 272 */ 273 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { 274 private: 275 /** 276 * The header of the CompactTrieDictionary which contains all info 277 */ 278 279 CompactTrieInfo *fInfo; 280 281 /** 282 * A UBool indicating whether or not we own the fData. 283 */ 284 UBool fOwnData; 285 286 UDataMemory *fUData; 287 public: 288 /** 289 * <p>Construct a dictionary from a UDataMemory.</p> 290 * 291 * @param data A pointer to a UDataMemory, which is adopted 292 * @param status A status code giving the result of the constructor 293 */ 294 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); 295 296 /** 297 * <p>Construct a dictionary from raw saved data.</p> 298 * 299 * @param data A pointer to the raw data, which is still owned by the caller 300 * @param status A status code giving the result of the constructor 301 */ 302 CompactTrieDictionary(const void *dataObj, UErrorCode &status); 303 304 /** 305 * <p>Construct a dictionary from a MutableTrieDictionary.</p> 306 * 307 * @param dict The dictionary to use as input. 308 * @param status A status code recording the success of the call. 309 */ 310 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status ); 311 312 /** 313 * <p>Virtual destructor.</p> 314 */ 315 virtual ~CompactTrieDictionary(); 316 317 /** 318 * <p>Returns true if the dictionary contains values associated with each word.</p> 319 */ 320 virtual UBool getValued() const; 321 322 /** 323 * <p>Find dictionary words that match the text.</p> 324 * 325 * @param text A UText representing the text. The 326 * iterator is left after the longest prefix match in the dictionary. 327 * @param maxLength The maximum number of code units to match. 328 * @param lengths An array that is filled with the lengths of words that matched. 329 * @param count Filled with the number of elements output in lengths. 330 * @param limit The size of the lengths array; this limits the number of words output. 331 * @param values An array that is filled with the values associated with the matched words. 332 * @return The number of characters in text that were matched. 333 */ 334 virtual int32_t matches( UText *text, 335 int32_t maxLength, 336 int32_t *lengths, 337 int &count, 338 int limit, 339 uint16_t *values = NULL) const; 340 341 /** 342 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> 343 * 344 * @param status A status code recording the success of the call. 345 * @return A StringEnumeration that will iterate through the whole dictionary. 346 * The caller is responsible for closing it. The order is unspecified. 347 */ 348 virtual StringEnumeration *openWords( UErrorCode &status ) const; 349 350 /** 351 * <p>Return the size of the compact data.</p> 352 * 353 * @return The size of the dictionary's compact data. 354 */ 355 virtual uint32_t dataSize() const; 356 357 /** 358 * <p>Return a void * pointer to the (unmanaged) compact data, platform-endian.</p> 359 * 360 * @return The data for the compact dictionary, suitable for passing to the 361 * constructor. 362 */ 363 virtual const void *data() const; 364 365 /** 366 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> 367 * 368 * @param status A status code recording the success of the call. 369 * @return A MutableTrieDictionary with the same data as this dictionary 370 */ 371 virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const; 372 373 private: 374 375 /** 376 * <p>Convert a MutableTrieDictionary into a compact data blob.</p> 377 * 378 * @param dict The dictionary to convert. 379 * @param status A status code recording the success of the call. 380 * @return A single data blob starting with a CompactTrieHeader. 381 */ 382 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict, 383 UErrorCode &status ); 384 385 }; 386 387 U_NAMESPACE_END 388 389 /* TRIEDICT_H */ 390 #endif 391