Home | History | Annotate | Download | only in common
      1 /**
      2  *******************************************************************************
      3  * Copyright (C) 2006, International Business Machines Corporation and others. *
      4  * All Rights Reserved.                                                        *
      5  *******************************************************************************
      6  */
      7 
      8 #ifndef TRIEDICT_H
      9 #define TRIEDICT_H
     10 
     11 #include "unicode/utypes.h"
     12 #include "unicode/uobject.h"
     13 #include "unicode/utext.h"
     14 
     15 struct UEnumeration;
     16 struct UDataSwapper;
     17 struct UDataMemory;
     18 
     19  /**
     20   * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
     21   *
     22   * @param ds Pointer to UDataSwapper containing global data about the
     23   *           transformation and function pointers for handling primitive
     24   *           types.
     25   * @param inData Pointer to the input data to be transformed or examined.
     26   * @param length Length of the data, counting bytes. May be -1 for preflighting.
     27   *               If length>=0, then transform the data.
     28   *               If length==-1, then only determine the length of the data.
     29   *               The length cannot be determined from the data itself for all
     30   *               types of data (e.g., not for simple arrays of integers).
     31   * @param outData Pointer to the output data buffer.
     32   *                If length>=0 (transformation), then the output buffer must
     33   *                have a capacity of at least length.
     34   *                If length==-1, then outData will not be used and can be NULL.
     35   * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
     36   *                   fulfill U_SUCCESS on input.
     37   * @return The actual length of the data.
     38   *
     39   * @see UDataSwapper
     40   */
     41 
     42 U_CAPI int32_t U_EXPORT2
     43 triedict_swap(const UDataSwapper *ds,
     44             const void *inData, int32_t length, void *outData,
     45             UErrorCode *pErrorCode);
     46 
     47 U_NAMESPACE_BEGIN
     48 
     49 class StringEnumeration;
     50 struct CompactTrieHeader;
     51 
     52 /*******************************************************************
     53  * TrieWordDictionary
     54  */
     55 
     56 /**
     57  * <p>TrieWordDictionary is an abstract class that represents a word
     58  * dictionary based on a trie. The base protocol is read-only.
     59  * Subclasses may allow writing.</p>
     60  */
     61 class U_COMMON_API TrieWordDictionary : public UMemory {
     62  public:
     63 
     64   /**
     65    * <p>Default constructor.</p>
     66    *
     67    */
     68   TrieWordDictionary();
     69 
     70   /**
     71    * <p>Virtual destructor.</p>
     72    */
     73   virtual ~TrieWordDictionary();
     74 
     75  /**
     76   * <p>Find dictionary words that match the text.</p>
     77   *
     78   * @param text A UText representing the text. The
     79   * iterator is left after the longest prefix match in the dictionary.
     80   * @param start The current position in text.
     81   * @param maxLength The maximum number of code units to match.
     82   * @param lengths An array that is filled with the lengths of words that matched.
     83   * @param count Filled with the number of elements output in lengths.
     84   * @param limit The size of the lengths array; this limits the number of words output.
     85   * @return The number of characters in text that were matched.
     86   */
     87   virtual int32_t matches( UText *text,
     88                               int32_t maxLength,
     89                               int32_t *lengths,
     90                               int &count,
     91                               int limit ) const = 0;
     92 
     93   /**
     94    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
     95    *
     96    * @param status A status code recording the success of the call.
     97    * @return A StringEnumeration that will iterate through the whole dictionary.
     98    * The caller is responsible for closing it. The order is unspecified.
     99    */
    100   virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
    101 
    102 };
    103 
    104 /*******************************************************************
    105  * MutableTrieDictionary
    106  */
    107 
    108 /**
    109  * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
    110  * added.</p>
    111  */
    112 
    113 struct TernaryNode;             // Forwards declaration
    114 
    115 class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
    116  private:
    117     /**
    118      * The root node of the trie
    119      * @internal
    120      */
    121 
    122   TernaryNode               *fTrie;
    123 
    124     /**
    125      * A UText for internal use
    126      * @internal
    127      */
    128 
    129   UText    *fIter;
    130 
    131   friend class CompactTrieDictionary;   // For fast conversion
    132 
    133  public:
    134 
    135  /**
    136   * <p>Constructor.</p>
    137   *
    138   * @param median A UChar around which to balance the trie. Ideally, it should
    139   * begin at least one word that is near the median of the set in the dictionary
    140   * @param status A status code recording the success of the call.
    141   */
    142   MutableTrieDictionary( UChar median, UErrorCode &status );
    143 
    144   /**
    145    * <p>Virtual destructor.</p>
    146    */
    147   virtual ~MutableTrieDictionary();
    148 
    149  /**
    150   * <p>Find dictionary words that match the text.</p>
    151   *
    152   * @param text A UText representing the text. The
    153   * iterator is left after the longest prefix match in the dictionary.
    154   * @param maxLength The maximum number of code units to match.
    155   * @param lengths An array that is filled with the lengths of words that matched.
    156   * @param count Filled with the number of elements output in lengths.
    157   * @param limit The size of the lengths array; this limits the number of words output.
    158   * @return The number of characters in text that were matched.
    159   */
    160   virtual int32_t matches( UText *text,
    161                               int32_t maxLength,
    162                               int32_t *lengths,
    163                               int &count,
    164                               int limit ) const;
    165 
    166   /**
    167    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
    168    *
    169    * @param status A status code recording the success of the call.
    170    * @return A StringEnumeration that will iterate through the whole dictionary.
    171    * The caller is responsible for closing it. The order is unspecified.
    172    */
    173   virtual StringEnumeration *openWords( UErrorCode &status ) const;
    174 
    175  /**
    176   * <p>Add one word to the dictionary.</p>
    177   *
    178   * @param word A UChar buffer containing the word.
    179   * @param length The length of the word.
    180   * @param status The resultant status
    181   */
    182   virtual void addWord( const UChar *word,
    183                         int32_t length,
    184                         UErrorCode &status);
    185 
    186 #if 0
    187  /**
    188   * <p>Add all strings from a UEnumeration to the dictionary.</p>
    189   *
    190   * @param words A UEnumeration that will return the desired words.
    191   * @param status The resultant status
    192   */
    193   virtual void addWords( UEnumeration *words, UErrorCode &status );
    194 #endif
    195 
    196 protected:
    197  /**
    198   * <p>Search the dictionary for matches.</p>
    199   *
    200   * @param text A UText representing the text. The
    201   * iterator is left after the longest prefix match in the dictionary.
    202   * @param maxLength The maximum number of code units to match.
    203   * @param lengths An array that is filled with the lengths of words that matched.
    204   * @param count Filled with the number of elements output in lengths.
    205   * @param limit The size of the lengths array; this limits the number of words output.
    206   * @param parent The parent of the current node
    207   * @param pMatched The returned parent node matched the input
    208   * @return The number of characters in text that were matched.
    209   */
    210   virtual int32_t search( UText *text,
    211                               int32_t maxLength,
    212                               int32_t *lengths,
    213                               int &count,
    214                               int limit,
    215                               TernaryNode *&parent,
    216                               UBool &pMatched ) const;
    217 
    218 private:
    219  /**
    220   * <p>Private constructor. The root node it not allocated.</p>
    221   *
    222   * @param status A status code recording the success of the call.
    223   */
    224   MutableTrieDictionary( UErrorCode &status );
    225 };
    226 
    227 /*******************************************************************
    228  * CompactTrieDictionary
    229  */
    230 
    231 /**
    232  * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
    233  * to save space.</p>
    234  */
    235 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
    236  private:
    237     /**
    238      * The root node of the trie
    239      */
    240 
    241   const CompactTrieHeader   *fData;
    242 
    243     /**
    244      * A UBool indicating whether or not we own the fData.
    245      */
    246 
    247   UBool                     fOwnData;
    248 
    249     UDataMemory              *fUData;
    250  public:
    251   /**
    252    * <p>Construct a dictionary from a UDataMemory.</p>
    253    *
    254    * @param data A pointer to a UDataMemory, which is adopted
    255    * @param status A status code giving the result of the constructor
    256    */
    257   CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
    258 
    259   /**
    260    * <p>Construct a dictionary from raw saved data.</p>
    261    *
    262    * @param data A pointer to the raw data, which is still owned by the caller
    263    * @param status A status code giving the result of the constructor
    264    */
    265   CompactTrieDictionary(const void *dataObj, UErrorCode &status);
    266 
    267   /**
    268    * <p>Construct a dictionary from a MutableTrieDictionary.</p>
    269    *
    270    * @param dict The dictionary to use as input.
    271    * @param status A status code recording the success of the call.
    272    */
    273   CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
    274 
    275   /**
    276    * <p>Virtual destructor.</p>
    277    */
    278   virtual ~CompactTrieDictionary();
    279 
    280  /**
    281   * <p>Find dictionary words that match the text.</p>
    282   *
    283   * @param text A UText representing the text. The
    284   * iterator is left after the longest prefix match in the dictionary.
    285   * @param maxLength The maximum number of code units to match.
    286   * @param lengths An array that is filled with the lengths of words that matched.
    287   * @param count Filled with the number of elements output in lengths.
    288   * @param limit The size of the lengths array; this limits the number of words output.
    289   * @return The number of characters in text that were matched.
    290   */
    291   virtual int32_t matches( UText *text,
    292                               int32_t rangeEnd,
    293                               int32_t *lengths,
    294                               int &count,
    295                               int limit ) const;
    296 
    297   /**
    298    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
    299    *
    300    * @param status A status code recording the success of the call.
    301    * @return A StringEnumeration that will iterate through the whole dictionary.
    302    * The caller is responsible for closing it. The order is unspecified.
    303    */
    304   virtual StringEnumeration *openWords( UErrorCode &status ) const;
    305 
    306  /**
    307   * <p>Return the size of the compact data.</p>
    308   *
    309   * @return The size of the dictionary's compact data.
    310   */
    311   virtual uint32_t dataSize() const;
    312 
    313  /**
    314   * <p>Return a void * pointer to the compact data, platform-endian.</p>
    315   *
    316   * @return The data for the compact dictionary, suitable for passing to the
    317   * constructor.
    318   */
    319   virtual const void *data() const;
    320 
    321  /**
    322   * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
    323   *
    324   * @param status A status code recording the success of the call.
    325   * @return A MutableTrieDictionary with the same data as this dictionary
    326   */
    327   virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
    328 
    329  private:
    330 
    331   /**
    332    * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
    333    *
    334    * @param dict The dictionary to convert.
    335    * @param status A status code recording the success of the call.
    336    * @return A single data blob starting with a CompactTrieHeader.
    337    */
    338   static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
    339                                                         UErrorCode &status );
    340 
    341 };
    342 
    343 U_NAMESPACE_END
    344 
    345     /* TRIEDICT_H */
    346 #endif
    347