Home | History | Annotate | Download | only in common
      1 /**
      2  *******************************************************************************
      3  * Copyright (C) 2006, International Business Machines Corporation and others. *
      4  * All Rights Reserved.                                                        *
      5  *******************************************************************************
      6  */
      7 
      8 #ifndef TRIEDICT_H
      9 #define TRIEDICT_H
     10 
     11 #include "unicode/utypes.h"
     12 #include "unicode/uobject.h"
     13 #include "unicode/utext.h"
     14 
     15 struct UEnumeration;
     16 struct UDataSwapper;
     17 struct UDataMemory;
     18 
     19  /**
     20   * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
     21   *
     22   * @param ds Pointer to UDataSwapper containing global data about the
     23   *           transformation and function pointers for handling primitive
     24   *           types.
     25   * @param inData Pointer to the input data to be transformed or examined.
     26   * @param length Length of the data, counting bytes. May be -1 for preflighting.
     27   *               If length>=0, then transform the data.
     28   *               If length==-1, then only determine the length of the data.
     29   *               The length cannot be determined from the data itself for all
     30   *               types of data (e.g., not for simple arrays of integers).
     31   * @param outData Pointer to the output data buffer.
     32   *                If length>=0 (transformation), then the output buffer must
     33   *                have a capacity of at least length.
     34   *                If length==-1, then outData will not be used and can be NULL.
     35   * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
     36   *                   fulfill U_SUCCESS on input.
     37   * @return The actual length of the data.
     38   *
     39   * @see UDataSwapper
     40   */
     41 
     42 U_CAPI int32_t U_EXPORT2
     43 triedict_swap(const UDataSwapper *ds,
     44             const void *inData, int32_t length, void *outData,
     45             UErrorCode *pErrorCode);
     46 
     47 U_NAMESPACE_BEGIN
     48 
     49 class StringEnumeration;
     50 
     51 /*******************************************************************
     52  * TrieWordDictionary
     53  */
     54 
     55 /**
     56  * <p>TrieWordDictionary is an abstract class that represents a word
     57  * dictionary based on a trie. The base protocol is read-only.
     58  * Subclasses may allow writing.</p>
     59  */
     60 class U_COMMON_API TrieWordDictionary : public UMemory {
     61  public:
     62 
     63   /**
     64    * <p>Default constructor.</p>
     65    *
     66    */
     67   TrieWordDictionary();
     68 
     69   /**
     70    * <p>Virtual destructor.</p>
     71    */
     72   virtual ~TrieWordDictionary();
     73 
     74   /**
     75    * <p>Returns true if the dictionary contains values associated with each word.</p>
     76    */
     77   virtual UBool getValued() const = 0;
     78 
     79  /**
     80   * <p>Find dictionary words that match the text.</p>
     81   *
     82   * @param text A UText representing the text. The
     83   * iterator is left after the longest prefix match in the dictionary.
     84   * @param maxLength The maximum number of code units to match.
     85   * @param lengths An array that is filled with the lengths of words that matched.
     86   * @param count Filled with the number of elements output in lengths.
     87   * @param limit The size of the lengths array; this limits the number of words output.
     88   * @param values An array that is filled with the values associated with the matched words.
     89   * @return The number of characters in text that were matched.
     90   */
     91   virtual int32_t matches( UText *text,
     92                               int32_t maxLength,
     93                               int32_t *lengths,
     94                               int &count,
     95                               int limit,
     96                               uint16_t *values = NULL) const = 0;
     97 
     98   /**
     99    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
    100    *
    101    * @param status A status code recording the success of the call.
    102    * @return A StringEnumeration that will iterate through the whole dictionary.
    103    * The caller is responsible for closing it. The order is unspecified.
    104    */
    105   virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
    106 
    107 };
    108 
    109 /*******************************************************************
    110  * MutableTrieDictionary
    111  */
    112 
    113 /**
    114  * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
    115  * added.</p>
    116  */
    117 
    118 struct TernaryNode;             // Forwards declaration
    119 
    120 class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
    121  private:
    122     /**
    123      * The root node of the trie
    124      * @internal
    125      */
    126 
    127   TernaryNode               *fTrie;
    128 
    129     /**
    130      * A UText for internal use
    131      * @internal
    132      */
    133 
    134   UText    *fIter;
    135 
    136     /**
    137      * A UText for internal use
    138      * @internal
    139      */
    140   UBool fValued;
    141 
    142   friend class CompactTrieDictionary;   // For fast conversion
    143 
    144  public:
    145 
    146  /**
    147   * <p>Constructor.</p>
    148   *
    149   * @param median A UChar around which to balance the trie. Ideally, it should
    150   * begin at least one word that is near the median of the set in the dictionary
    151   * @param status A status code recording the success of the call.
    152   * @param containsValue True if the dictionary stores values associated with each word.
    153   */
    154   MutableTrieDictionary( UChar median, UErrorCode &status, UBool containsValue = FALSE );
    155 
    156   /**
    157    * <p>Virtual destructor.</p>
    158    */
    159   virtual ~MutableTrieDictionary();
    160 
    161   /**
    162    * Indicate whether the MutableTrieDictionary stores values associated with each word
    163    */
    164   void setValued(UBool valued){
    165       fValued = valued;
    166   }
    167 
    168   /**
    169    * <p>Returns true if the dictionary contains values associated with each word.</p>
    170    */
    171   virtual UBool getValued() const {
    172       return fValued;
    173   }
    174 
    175  /**
    176   * <p>Find dictionary words that match the text.</p>
    177   *
    178   * @param text A UText representing the text. The
    179   * iterator is left after the longest prefix match in the dictionary.
    180   * @param maxLength The maximum number of code units to match.
    181   * @param lengths An array that is filled with the lengths of words that matched.
    182   * @param count Filled with the number of elements output in lengths.
    183   * @param limit The size of the lengths array; this limits the number of words output.
    184   * @param values An array that is filled with the values associated with the matched words.
    185   * @return The number of characters in text that were matched.
    186   */
    187   virtual int32_t matches( UText *text,
    188                               int32_t maxLength,
    189                               int32_t *lengths,
    190                               int &count,
    191                               int limit,
    192                               uint16_t *values = NULL) const;
    193 
    194   /**
    195    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
    196    *
    197    * @param status A status code recording the success of the call.
    198    * @return A StringEnumeration that will iterate through the whole dictionary.
    199    * The caller is responsible for closing it. The order is unspecified.
    200    */
    201   virtual StringEnumeration *openWords( UErrorCode &status ) const;
    202 
    203  /**
    204   * <p>Add one word to the dictionary with an optional associated value.</p>
    205   *
    206   * @param word A UChar buffer containing the word.
    207   * @param length The length of the word.
    208   * @param status The resultant status.
    209   * @param value The nonzero value associated with this word.
    210   */
    211   virtual void addWord( const UChar *word,
    212                         int32_t length,
    213                         UErrorCode &status,
    214                         uint16_t value = 0);
    215 
    216 #if 0
    217  /**
    218   * <p>Add all strings from a UEnumeration to the dictionary.</p>
    219   *
    220   * @param words A UEnumeration that will return the desired words.
    221   * @param status The resultant status
    222   */
    223   virtual void addWords( UEnumeration *words, UErrorCode &status );
    224 #endif
    225 
    226 protected:
    227  /**
    228   * <p>Search the dictionary for matches.</p>
    229   *
    230   * @param text A UText representing the text. The
    231   * iterator is left after the longest prefix match in the dictionary.
    232   * @param maxLength The maximum number of code units to match.
    233   * @param lengths An array that is filled with the lengths of words that matched.
    234   * @param count Filled with the number of elements output in lengths.
    235   * @param limit The size of the lengths array; this limits the number of words output.
    236   * @param parent The parent of the current node.
    237   * @param pMatched The returned parent node matched the input/
    238   * @param values An array that is filled with the values associated with the matched words.
    239   * @return The number of characters in text that were matched.
    240   */
    241   virtual int32_t search( UText *text,
    242                               int32_t maxLength,
    243                               int32_t *lengths,
    244                               int &count,
    245                               int limit,
    246                               TernaryNode *&parent,
    247                               UBool &pMatched,
    248                               uint16_t *values = NULL) const;
    249 
    250 private:
    251  /**
    252   * <p>Private constructor. The root node it not allocated.</p>
    253   *
    254   * @param status A status code recording the success of the call.
    255   * @param containsValues True if the dictionary will store a value associated
    256   * with each word added.
    257   */
    258   MutableTrieDictionary( UErrorCode &status, UBool containsValues = false );
    259 };
    260 
    261 /*******************************************************************
    262  * CompactTrieDictionary
    263  */
    264 
    265 //forward declarations
    266 struct CompactTrieHeader;
    267 struct CompactTrieInfo;
    268 
    269 /**
    270  * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
    271  * to save space.</p>
    272  */
    273 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
    274  private:
    275   /**
    276    * The header of the CompactTrieDictionary which contains all info
    277    */
    278 
    279   CompactTrieInfo                 *fInfo;
    280 
    281   /**
    282    * A UBool indicating whether or not we own the fData.
    283    */
    284   UBool                     fOwnData;
    285 
    286   UDataMemory              *fUData;
    287  public:
    288   /**
    289    * <p>Construct a dictionary from a UDataMemory.</p>
    290    *
    291    * @param data A pointer to a UDataMemory, which is adopted
    292    * @param status A status code giving the result of the constructor
    293    */
    294   CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
    295 
    296   /**
    297    * <p>Construct a dictionary from raw saved data.</p>
    298    *
    299    * @param data A pointer to the raw data, which is still owned by the caller
    300    * @param status A status code giving the result of the constructor
    301    */
    302   CompactTrieDictionary(const void *dataObj, UErrorCode &status);
    303 
    304   /**
    305    * <p>Construct a dictionary from a MutableTrieDictionary.</p>
    306    *
    307    * @param dict The dictionary to use as input.
    308    * @param status A status code recording the success of the call.
    309    */
    310   CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
    311 
    312   /**
    313    * <p>Virtual destructor.</p>
    314    */
    315   virtual ~CompactTrieDictionary();
    316 
    317   /**
    318    * <p>Returns true if the dictionary contains values associated with each word.</p>
    319    */
    320   virtual UBool getValued() const;
    321 
    322  /**
    323   * <p>Find dictionary words that match the text.</p>
    324   *
    325   * @param text A UText representing the text. The
    326   * iterator is left after the longest prefix match in the dictionary.
    327   * @param maxLength The maximum number of code units to match.
    328   * @param lengths An array that is filled with the lengths of words that matched.
    329   * @param count Filled with the number of elements output in lengths.
    330   * @param limit The size of the lengths array; this limits the number of words output.
    331   * @param values An array that is filled with the values associated with the matched words.
    332   * @return The number of characters in text that were matched.
    333   */
    334   virtual int32_t matches( UText *text,
    335                               int32_t maxLength,
    336                               int32_t *lengths,
    337                               int &count,
    338                               int limit,
    339                               uint16_t *values = NULL) const;
    340 
    341   /**
    342    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
    343    *
    344    * @param status A status code recording the success of the call.
    345    * @return A StringEnumeration that will iterate through the whole dictionary.
    346    * The caller is responsible for closing it. The order is unspecified.
    347    */
    348   virtual StringEnumeration *openWords( UErrorCode &status ) const;
    349 
    350  /**
    351   * <p>Return the size of the compact data.</p>
    352   *
    353   * @return The size of the dictionary's compact data.
    354   */
    355   virtual uint32_t dataSize() const;
    356 
    357  /**
    358   * <p>Return a void * pointer to the (unmanaged) compact data, platform-endian.</p>
    359   *
    360   * @return The data for the compact dictionary, suitable for passing to the
    361   * constructor.
    362   */
    363   virtual const void *data() const;
    364 
    365  /**
    366   * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
    367   *
    368   * @param status A status code recording the success of the call.
    369   * @return A MutableTrieDictionary with the same data as this dictionary
    370   */
    371   virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
    372 
    373  private:
    374 
    375   /**
    376    * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
    377    *
    378    * @param dict The dictionary to convert.
    379    * @param status A status code recording the success of the call.
    380    * @return A single data blob starting with a CompactTrieHeader.
    381    */
    382   static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
    383                                                         UErrorCode &status );
    384 
    385 };
    386 
    387 U_NAMESPACE_END
    388 
    389 /* TRIEDICT_H */
    390 #endif
    391