Home | History | Annotate | Download | only in unicode
      1 /*
      2  ******************************************************************************
      3  *   Copyright (C) 1996-2012, International Business Machines                 *
      4  *   Corporation and others.  All Rights Reserved.                            *
      5  ******************************************************************************
      6  */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: Collation data used to compute minLengthInChars.
     11  * \internal
     12  */
     13 
     14 #ifndef COLL_DATA_H
     15 #define COLL_DATA_H
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "unicode/uobject.h"
     22 #include "unicode/ucol.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 #ifndef U_HIDE_INTERNAL_API
     27 /**
     28  * The size of the internal buffer for the Collator's short description string.
     29  * @internal ICU 4.0.1 technology preview
     30  */
     31 #define KEY_BUFFER_SIZE 64
     32 
     33  /**
     34   * The size of the internal CE buffer in a <code>CEList</code> object
     35   * @internal ICU 4.0.1 technology preview
     36   */
     37 #define CELIST_BUFFER_SIZE 4
     38 
     39 /**
     40  * \def INSTRUMENT_CELIST
     41  * Define this to enable the <code>CEList</code> objects to collect
     42  * statistics.
     43  * @internal ICU 4.0.1 technology preview
     44  */
     45 //#define INSTRUMENT_CELIST
     46 
     47  /**
     48   * The size of the initial list in a <code>StringList</code> object.
     49   * @internal ICU 4.0.1 technology preview
     50   */
     51 #define STRING_LIST_BUFFER_SIZE 16
     52 
     53 /**
     54  * \def INSTRUMENT_STRING_LIST
     55  * Define this to enable the <code>StringList</code> objects to
     56  * collect statistics.
     57  * @internal ICU 4.0.1 technology preview
     58  */
     59 //#define INSTRUMENT_STRING_LIST
     60 
     61  /**
     62   * This object holds a list of CEs generated from a particular
     63   * <code>UnicodeString</code>
     64   *
     65   * @internal ICU 4.0.1 technology preview
     66   */
     67 class U_I18N_API CEList : public UObject
     68 {
     69 public:
     70     /**
     71      * Construct a <code>CEList</code> object.
     72      *
     73      * @param coll - the Collator used to collect the CEs.
     74      * @param string - the string for which to collect the CEs.
     75      * @param status - will be set if any errors occur.
     76      *
     77      * Note: if on return, status is set to an error code,
     78      * the only safe thing to do with this object is to call
     79      * the destructor.
     80      *
     81      * @internal ICU 4.0.1 technology preview
     82      */
     83     CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
     84 
     85     /**
     86      * The destructor.
     87      * @internal ICU 4.0.1 technology preview
     88      */
     89     ~CEList();
     90 
     91     /**
     92      * Return the number of CEs in the list.
     93      *
     94      * @return the number of CEs in the list.
     95      *
     96      * @internal ICU 4.0.1 technology preview
     97      */
     98     int32_t size() const;
     99 
    100     /**
    101      * Get a particular CE from the list.
    102      *
    103      * @param index - the index of the CE to return
    104      *
    105      * @return the CE, or <code>0</code> if <code>index</code> is out of range
    106      *
    107      * @internal ICU 4.0.1 technology preview
    108      */
    109     uint32_t get(int32_t index) const;
    110 
    111     /**
    112      * Check if the CEs in another <code>CEList</code> match the
    113      * suffix of this list starting at a give offset.
    114      *
    115      * @param offset - the offset of the suffix
    116      * @param other - the other <code>CEList</code>
    117      *
    118      * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
    119      *
    120      * @internal ICU 4.0.1 technology preview
    121      */
    122     UBool matchesAt(int32_t offset, const CEList *other) const;
    123 
    124     /**
    125      * The index operator.
    126      *
    127      * @param index - the index
    128      *
    129      * @return a reference to the given CE in the list
    130      *
    131      * @internal ICU 4.0.1 technology preview
    132      */
    133     uint32_t &operator[](int32_t index) const;
    134 
    135     /**
    136      * UObject glue...
    137      * @internal ICU 4.0.1 technology preview
    138      */
    139     virtual UClassID getDynamicClassID() const;
    140     /**
    141      * UObject glue...
    142      * @internal ICU 4.0.1 technology preview
    143      */
    144     static UClassID getStaticClassID();
    145 
    146 private:
    147     void add(uint32_t ce, UErrorCode &status);
    148 
    149     uint32_t ceBuffer[CELIST_BUFFER_SIZE];
    150     uint32_t *ces;
    151     int32_t listMax;
    152     int32_t listSize;
    153 
    154 #ifdef INSTRUMENT_CELIST
    155     static int32_t _active;
    156     static int32_t _histogram[10];
    157 #endif
    158 };
    159 
    160 /**
    161  * StringList
    162  *
    163  * This object holds a list of <code>UnicodeString</code> objects.
    164  *
    165  * @internal ICU 4.0.1 technology preview
    166  */
    167 class U_I18N_API StringList : public UObject
    168 {
    169 public:
    170     /**
    171      * Construct an empty <code>StringList</code>
    172      *
    173      * @param status - will be set if any errors occur.
    174      *
    175      * Note: if on return, status is set to an error code,
    176      * the only safe thing to do with this object is to call
    177      * the destructor.
    178      *
    179      * @internal ICU 4.0.1 technology preview
    180      */
    181     StringList(UErrorCode &status);
    182 
    183     /**
    184      * The destructor.
    185      *
    186      * @internal ICU 4.0.1 technology preview
    187      */
    188     ~StringList();
    189 
    190     /**
    191      * Add a string to the list.
    192      *
    193      * @param string - the string to add
    194      * @param status - will be set if any errors occur.
    195      *
    196      * @internal ICU 4.0.1 technology preview
    197      */
    198     void add(const UnicodeString *string, UErrorCode &status);
    199 
    200     /**
    201      * Add an array of Unicode code points to the list.
    202      *
    203      * @param chars - the address of the array of code points
    204      * @param count - the number of code points in the array
    205      * @param status - will be set if any errors occur.
    206      *
    207      * @internal ICU 4.0.1 technology preview
    208      */
    209     void add(const UChar *chars, int32_t count, UErrorCode &status);
    210 
    211     /**
    212      * Get a particular string from the list.
    213      *
    214      * @param index - the index of the string
    215      *
    216      * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
    217      *         if <code>index</code> is out of bounds.
    218      *
    219      * @internal ICU 4.0.1 technology preview
    220      */
    221     const UnicodeString *get(int32_t index) const;
    222 
    223     /**
    224      * Get the number of stings in the list.
    225      *
    226      * @return the number of strings in the list.
    227      *
    228      * @internal ICU 4.0.1 technology preview
    229      */
    230     int32_t size() const;
    231 
    232     /**
    233      * the UObject glue...
    234      * @internal ICU 4.0.1 technology preview
    235      */
    236     virtual UClassID getDynamicClassID() const;
    237     /**
    238      * the UObject glue...
    239      * @internal ICU 4.0.1 technology preview
    240      */
    241     static UClassID getStaticClassID();
    242 
    243 private:
    244     UnicodeString *strings;
    245     int32_t listMax;
    246     int32_t listSize;
    247 
    248 #ifdef INSTRUMENT_STRING_LIST
    249     static int32_t _lists;
    250     static int32_t _strings;
    251     static int32_t _histogram[101];
    252 #endif
    253 };
    254 #endif  /* U_HIDE_INTERNAL_API */
    255 
    256 /*
    257  * Forward references to internal classes.
    258  */
    259 class StringToCEsMap;
    260 class CEToStringsMap;
    261 class CollDataCache;
    262 
    263 #ifndef U_HIDE_INTERNAL_API
    264 /**
    265  * CollData
    266  *
    267  * This class holds the Collator-specific data needed to
    268  * compute the length of the shortest string that can
    269  * generate a partcular list of CEs.
    270  *
    271  * <code>CollData</code> objects are quite expensive to compute. Because
    272  * of this, they are cached. When you call <code>CollData::open</code> it
    273  * returns a reference counted cached object. When you call <code>CollData::close</code>
    274  * the reference count on the object is decremented but the object is not deleted.
    275  *
    276  * If you do not need to reuse any unreferenced objects in the cache, you can call
    277  * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
    278  * objects, you can call <code>CollData::freeCollDataCache</code>
    279  *
    280  * @internal ICU 4.0.1 technology preview
    281  */
    282 class U_I18N_API CollData : public UObject
    283 {
    284 public:
    285     /**
    286      * Construct a <code>CollData</code> object.
    287      *
    288      * @param collator - the collator
    289      * @param status - will be set if any errors occur.
    290      *
    291      * @return the <code>CollData</code> object. You must call
    292      *         <code>close</code> when you are done using the object.
    293      *
    294      * Note: if on return, status is set to an error code,
    295      * the only safe thing to do with this object is to call
    296      * <code>CollData::close</code>.
    297      *
    298      * @internal ICU 4.0.1 technology preview
    299      */
    300     static CollData *open(UCollator *collator, UErrorCode &status);
    301 
    302     /**
    303      * Release a <code>CollData</code> object.
    304      *
    305      * @param collData - the object
    306      *
    307      * @internal ICU 4.0.1 technology preview
    308      */
    309     static void close(CollData *collData);
    310 
    311     /**
    312      * Get the <code>UCollator</code> object used to create this object.
    313      * The object returned may not be the exact object that was used to
    314      * create this object, but it will have the same behavior.
    315      * @internal ICU 4.0.1 technology preview
    316      */
    317     UCollator *getCollator() const;
    318 
    319     /**
    320      * Get a list of all the strings which generate a list
    321      * of CEs starting with a given CE.
    322      *
    323      * @param ce - the CE
    324      *
    325      * return a <code>StringList</code> object containing all
    326      *        the stirngs, or <code>NULL</code> if there are
    327      *        no such strings.
    328      *
    329      * @internal ICU 4.0.1 technology preview.
    330      */
    331     const StringList *getStringList(int32_t ce) const;
    332 
    333     /**
    334      * Get a list of the CEs generated by a partcular stirng.
    335      *
    336      * @param string - the string
    337      *
    338      * @return a <code>CEList</code> object containt the CEs. You
    339      *         must call <code>freeCEList</code> when you are finished
    340      *         using the <code>CEList</code>/
    341      *
    342      * @internal ICU 4.0.1 technology preview.
    343      */
    344     const CEList *getCEList(const UnicodeString *string) const;
    345 
    346     /**
    347      * Release a <code>CEList</code> returned by <code>getCEList</code>.
    348      *
    349      * @param list - the <code>CEList</code> to free.
    350      *
    351      * @internal ICU 4.0.1 technology preview
    352      */
    353     void freeCEList(const CEList *list);
    354 
    355     /**
    356      * Return the length of the shortest string that will generate
    357      * the given list of CEs.
    358      *
    359      * @param ces - the CEs
    360      * @param offset - the offset of the first CE in the list to use.
    361      *
    362      * @return the length of the shortest string.
    363      *
    364      * @internal ICU 4.0.1 technology preview
    365      */
    366     int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
    367 
    368 
    369     /**
    370      * Return the length of the shortest string that will generate
    371      * the given list of CEs.
    372      *
    373      * Note: the algorithm used to do this computation is recursive. To
    374      * limit the amount of recursion, a "history" list is used to record
    375      * the best answer starting at a particular offset in the list of CEs.
    376      * If the same offset is visited again during the recursion, the answer
    377      * in the history list is used.
    378      *
    379      * @param ces - the CEs
    380      * @param offset - the offset of the first CE in the list to use.
    381      * @param history - the history list. Must be at least as long as
    382      *                 the number of cEs in the <code>CEList</code>
    383      *
    384      * @return the length of the shortest string.
    385      *
    386      * @internal ICU 4.0.1 technology preview
    387      */
    388    int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
    389 
    390    /**
    391     * UObject glue...
    392     * @internal ICU 4.0.1 technology preview
    393     */
    394     virtual UClassID getDynamicClassID() const;
    395    /**
    396     * UObject glue...
    397     * @internal ICU 4.0.1 technology preview
    398     */
    399     static UClassID getStaticClassID();
    400 
    401     /**
    402      * <code>CollData</code> objects are expensive to compute, and so
    403      * may be cached. This routine will free the cached objects and delete
    404      * the cache.
    405      *
    406      * WARNING: Don't call this until you are have called <code>close</code>
    407      * for each <code>CollData</code> object that you have used. also,
    408      * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
    409      * at the same time.
    410      *
    411      * @internal 4.0.1 technology preview
    412      */
    413     static void freeCollDataCache();
    414 
    415     /**
    416      * <code>CollData</code> objects are expensive to compute, and so
    417      * may be cached. This routine will remove any unused <code>CollData</code>
    418      * objects from the cache.
    419      *
    420      * @internal 4.0.1 technology preview
    421      */
    422     static void flushCollDataCache();
    423 
    424 private:
    425     friend class CollDataCache;
    426     friend class CollDataCacheEntry;
    427 
    428     CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
    429     ~CollData();
    430 
    431     CollData();
    432 
    433     static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
    434 
    435     static CollDataCache *getCollDataCache();
    436 
    437     UCollator      *coll;
    438     StringToCEsMap *charsToCEList;
    439     CEToStringsMap *ceToCharsStartingWith;
    440 
    441     char keyBuffer[KEY_BUFFER_SIZE];
    442     char *key;
    443 
    444     static CollDataCache *collDataCache;
    445 
    446     uint32_t minHan;
    447     uint32_t maxHan;
    448 
    449     uint32_t jamoLimits[4];
    450 };
    451 #endif  /* U_HIDE_INTERNAL_API */
    452 
    453 U_NAMESPACE_END
    454 
    455 #endif // #if !UCONFIG_NO_COLLATION
    456 #endif // #ifndef COLL_DATA_H
    457