Home | History | Annotate | Download | only in unicode
      1 /*
      2  ******************************************************************************
      3  *   Copyright (C) 1996-2010, International Business Machines                 *
      4  *   Corporation and others.  All Rights Reserved.                            *
      5  ******************************************************************************
      6  */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: Collation data used to compute minLengthInChars.
     11  * \internal
     12  */
     13 
     14 #ifndef COLL_DATA_H
     15 #define COLL_DATA_H
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "unicode/uobject.h"
     22 #include "unicode/ucol.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 /**
     27  * The size of the internal buffer for the Collator's short description string.
     28  * @internal ICU 4.0.1 technology preview
     29  */
     30 #define KEY_BUFFER_SIZE 64
     31 
     32  /**
     33   * The size of the internal CE buffer in a <code>CEList</code> object
     34   * @internal ICU 4.0.1 technology preview
     35   */
     36 #define CELIST_BUFFER_SIZE 4
     37 
     38 /**
     39  * \def INSTRUMENT_CELIST
     40  * Define this to enable the <code>CEList</code> objects to collect
     41  * statistics.
     42  * @internal ICU 4.0.1 technology preview
     43  */
     44 //#define INSTRUMENT_CELIST
     45 
     46  /**
     47   * The size of the initial list in a <code>StringList</code> object.
     48   * @internal ICU 4.0.1 technology preview
     49   */
     50 #define STRING_LIST_BUFFER_SIZE 16
     51 
     52 /**
     53  * \def INSTRUMENT_STRING_LIST
     54  * Define this to enable the <code>StringList</code> objects to
     55  * collect statistics.
     56  * @internal ICU 4.0.1 technology preview
     57  */
     58 //#define INSTRUMENT_STRING_LIST
     59 
     60  /**
     61   * This object holds a list of CEs generated from a particular
     62   * <code>UnicodeString</code>
     63   *
     64   * @internal ICU 4.0.1 technology preview
     65   */
     66 class U_I18N_API CEList : public UObject
     67 {
     68 public:
     69     /**
     70      * Construct a <code>CEList</code> object.
     71      *
     72      * @param coll - the Collator used to collect the CEs.
     73      * @param string - the string for which to collect the CEs.
     74      * @param status - will be set if any errors occur.
     75      *
     76      * Note: if on return, status is set to an error code,
     77      * the only safe thing to do with this object is to call
     78      * the destructor.
     79      *
     80      * @internal ICU 4.0.1 technology preview
     81      */
     82     CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
     83 
     84     /**
     85      * The destructor.
     86      * @internal ICU 4.0.1 technology preview
     87      */
     88     ~CEList();
     89 
     90     /**
     91      * Return the number of CEs in the list.
     92      *
     93      * @return the number of CEs in the list.
     94      *
     95      * @internal ICU 4.0.1 technology preview
     96      */
     97     int32_t size() const;
     98 
     99     /**
    100      * Get a particular CE from the list.
    101      *
    102      * @param index - the index of the CE to return
    103      *
    104      * @return the CE, or <code>0</code> if <code>index</code> is out of range
    105      *
    106      * @internal ICU 4.0.1 technology preview
    107      */
    108     uint32_t get(int32_t index) const;
    109 
    110     /**
    111      * Check if the CEs in another <code>CEList</code> match the
    112      * suffix of this list starting at a give offset.
    113      *
    114      * @param offset - the offset of the suffix
    115      * @param other - the other <code>CEList</code>
    116      *
    117      * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
    118      *
    119      * @internal ICU 4.0.1 technology preview
    120      */
    121     UBool matchesAt(int32_t offset, const CEList *other) const;
    122 
    123     /**
    124      * The index operator.
    125      *
    126      * @param index - the index
    127      *
    128      * @return a reference to the given CE in the list
    129      *
    130      * @internal ICU 4.0.1 technology preview
    131      */
    132     uint32_t &operator[](int32_t index) const;
    133 
    134     /**
    135      * UObject glue...
    136      * @internal ICU 4.0.1 technology preview
    137      */
    138     virtual UClassID getDynamicClassID() const;
    139     /**
    140      * UObject glue...
    141      * @internal ICU 4.0.1 technology preview
    142      */
    143     static UClassID getStaticClassID();
    144 
    145 private:
    146     void add(uint32_t ce, UErrorCode &status);
    147 
    148     uint32_t ceBuffer[CELIST_BUFFER_SIZE];
    149     uint32_t *ces;
    150     int32_t listMax;
    151     int32_t listSize;
    152 
    153 #ifdef INSTRUMENT_CELIST
    154     static int32_t _active;
    155     static int32_t _histogram[10];
    156 #endif
    157 };
    158 
    159 /**
    160  * StringList
    161  *
    162  * This object holds a list of <code>UnicodeString</code> objects.
    163  *
    164  * @internal ICU 4.0.1 technology preview
    165  */
    166 class U_I18N_API StringList : public UObject
    167 {
    168 public:
    169     /**
    170      * Construct an empty <code>StringList</code>
    171      *
    172      * @param status - will be set if any errors occur.
    173      *
    174      * Note: if on return, status is set to an error code,
    175      * the only safe thing to do with this object is to call
    176      * the destructor.
    177      *
    178      * @internal ICU 4.0.1 technology preview
    179      */
    180     StringList(UErrorCode &status);
    181 
    182     /**
    183      * The destructor.
    184      *
    185      * @internal ICU 4.0.1 technology preview
    186      */
    187     ~StringList();
    188 
    189     /**
    190      * Add a string to the list.
    191      *
    192      * @param string - the string to add
    193      * @param status - will be set if any errors occur.
    194      *
    195      * @internal ICU 4.0.1 technology preview
    196      */
    197     void add(const UnicodeString *string, UErrorCode &status);
    198 
    199     /**
    200      * Add an array of Unicode code points to the list.
    201      *
    202      * @param chars - the address of the array of code points
    203      * @param count - the number of code points in the array
    204      * @param status - will be set if any errors occur.
    205      *
    206      * @internal ICU 4.0.1 technology preview
    207      */
    208     void add(const UChar *chars, int32_t count, UErrorCode &status);
    209 
    210     /**
    211      * Get a particular string from the list.
    212      *
    213      * @param index - the index of the string
    214      *
    215      * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
    216      *         if <code>index</code> is out of bounds.
    217      *
    218      * @internal ICU 4.0.1 technology preview
    219      */
    220     const UnicodeString *get(int32_t index) const;
    221 
    222     /**
    223      * Get the number of stings in the list.
    224      *
    225      * @return the number of strings in the list.
    226      *
    227      * @internal ICU 4.0.1 technology preview
    228      */
    229     int32_t size() const;
    230 
    231     /**
    232      * the UObject glue...
    233      * @internal ICU 4.0.1 technology preview
    234      */
    235     virtual UClassID getDynamicClassID() const;
    236     /**
    237      * the UObject glue...
    238      * @internal ICU 4.0.1 technology preview
    239      */
    240     static UClassID getStaticClassID();
    241 
    242 private:
    243     UnicodeString *strings;
    244     int32_t listMax;
    245     int32_t listSize;
    246 
    247 #ifdef INSTRUMENT_STRING_LIST
    248     static int32_t _lists;
    249     static int32_t _strings;
    250     static int32_t _histogram[101];
    251 #endif
    252 };
    253 
    254 /*
    255  * Forward references to internal classes.
    256  */
    257 class StringToCEsMap;
    258 class CEToStringsMap;
    259 class CollDataCache;
    260 
    261 /**
    262  * CollData
    263  *
    264  * This class holds the Collator-specific data needed to
    265  * compute the length of the shortest string that can
    266  * generate a partcular list of CEs.
    267  *
    268  * <code>CollData</code> objects are quite expensive to compute. Because
    269  * of this, they are cached. When you call <code>CollData::open</code> it
    270  * returns a reference counted cached object. When you call <code>CollData::close</code>
    271  * the reference count on the object is decremented but the object is not deleted.
    272  *
    273  * If you do not need to reuse any unreferenced objects in the cache, you can call
    274  * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
    275  * objects, you can call <code>CollData::freeCollDataCache</code>
    276  *
    277  * @internal ICU 4.0.1 technology preview
    278  */
    279 class U_I18N_API CollData : public UObject
    280 {
    281 public:
    282     /**
    283      * Construct a <code>CollData</code> object.
    284      *
    285      * @param collator - the collator
    286      * @param status - will be set if any errors occur.
    287      *
    288      * @return the <code>CollData</code> object. You must call
    289      *         <code>close</code> when you are done using the object.
    290      *
    291      * Note: if on return, status is set to an error code,
    292      * the only safe thing to do with this object is to call
    293      * <code>CollData::close</code>.
    294      *
    295      * @internal ICU 4.0.1 technology preview
    296      */
    297     static CollData *open(UCollator *collator, UErrorCode &status);
    298 
    299     /**
    300      * Release a <code>CollData</code> object.
    301      *
    302      * @param collData - the object
    303      *
    304      * @internal ICU 4.0.1 technology preview
    305      */
    306     static void close(CollData *collData);
    307 
    308     /**
    309      * Get the <code>UCollator</code> object used to create this object.
    310      * The object returned may not be the exact object that was used to
    311      * create this object, but it will have the same behavior.
    312      * @internal ICU 4.0.1 technology preview
    313      */
    314     UCollator *getCollator() const;
    315 
    316     /**
    317      * Get a list of all the strings which generate a list
    318      * of CEs starting with a given CE.
    319      *
    320      * @param ce - the CE
    321      *
    322      * return a <code>StringList</code> object containing all
    323      *        the stirngs, or <code>NULL</code> if there are
    324      *        no such strings.
    325      *
    326      * @internal ICU 4.0.1 technology preview.
    327      */
    328     const StringList *getStringList(int32_t ce) const;
    329 
    330     /**
    331      * Get a list of the CEs generated by a partcular stirng.
    332      *
    333      * @param string - the string
    334      *
    335      * @return a <code>CEList</code> object containt the CEs. You
    336      *         must call <code>freeCEList</code> when you are finished
    337      *         using the <code>CEList</code>/
    338      *
    339      * @internal ICU 4.0.1 technology preview.
    340      */
    341     const CEList *getCEList(const UnicodeString *string) const;
    342 
    343     /**
    344      * Release a <code>CEList</code> returned by <code>getCEList</code>.
    345      *
    346      * @param list - the <code>CEList</code> to free.
    347      *
    348      * @internal ICU 4.0.1 technology preview
    349      */
    350     void freeCEList(const CEList *list);
    351 
    352     /**
    353      * Return the length of the shortest string that will generate
    354      * the given list of CEs.
    355      *
    356      * @param ces - the CEs
    357      * @param offset - the offset of the first CE in the list to use.
    358      *
    359      * @return the length of the shortest string.
    360      *
    361      * @internal ICU 4.0.1 technology preview
    362      */
    363     int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
    364 
    365 
    366     /**
    367      * Return the length of the shortest string that will generate
    368      * the given list of CEs.
    369      *
    370      * Note: the algorithm used to do this computation is recursive. To
    371      * limit the amount of recursion, a "history" list is used to record
    372      * the best answer starting at a particular offset in the list of CEs.
    373      * If the same offset is visited again during the recursion, the answer
    374      * in the history list is used.
    375      *
    376      * @param ces - the CEs
    377      * @param offset - the offset of the first CE in the list to use.
    378      * @param history - the history list. Must be at least as long as
    379      *                 the number of cEs in the <code>CEList</code>
    380      *
    381      * @return the length of the shortest string.
    382      *
    383      * @internal ICU 4.0.1 technology preview
    384      */
    385    int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
    386 
    387    /**
    388     * UObject glue...
    389     * @internal ICU 4.0.1 technology preview
    390     */
    391     virtual UClassID getDynamicClassID() const;
    392    /**
    393     * UObject glue...
    394     * @internal ICU 4.0.1 technology preview
    395     */
    396     static UClassID getStaticClassID();
    397 
    398     /**
    399      * <code>CollData</code> objects are expensive to compute, and so
    400      * may be cached. This routine will free the cached objects and delete
    401      * the cache.
    402      *
    403      * WARNING: Don't call this until you are have called <code>close</code>
    404      * for each <code>CollData</code> object that you have used. also,
    405      * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
    406      * at the same time.
    407      *
    408      * @internal 4.0.1 technology preview
    409      */
    410     static void freeCollDataCache();
    411 
    412     /**
    413      * <code>CollData</code> objects are expensive to compute, and so
    414      * may be cached. This routine will remove any unused <code>CollData</code>
    415      * objects from the cache.
    416      *
    417      * @internal 4.0.1 technology preview
    418      */
    419     static void flushCollDataCache();
    420 
    421 private:
    422     friend class CollDataCache;
    423     friend class CollDataCacheEntry;
    424 
    425     CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
    426     ~CollData();
    427 
    428     CollData();
    429 
    430     static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
    431 
    432     static CollDataCache *getCollDataCache();
    433 
    434     UCollator      *coll;
    435     StringToCEsMap *charsToCEList;
    436     CEToStringsMap *ceToCharsStartingWith;
    437 
    438     char keyBuffer[KEY_BUFFER_SIZE];
    439     char *key;
    440 
    441     static CollDataCache *collDataCache;
    442 
    443     uint32_t minHan;
    444     uint32_t maxHan;
    445 
    446     uint32_t jamoLimits[4];
    447 };
    448 
    449 U_NAMESPACE_END
    450 
    451 #endif // #if !UCONFIG_NO_COLLATION
    452 #endif // #ifndef COLL_DATA_H
    453