Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2012-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * collationbasedatabuilder.h
      7 *
      8 * created on: 2012aug11
      9 * created by: Markus W. Scherer
     10 */
     11 
     12 #ifndef __COLLATIONBASEDATABUILDER_H__
     13 #define __COLLATIONBASEDATABUILDER_H__
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_COLLATION
     18 
     19 #include "unicode/uniset.h"
     20 #include "unicode/unistr.h"
     21 #include "collation.h"
     22 #include "collationdata.h"
     23 #include "collationdatabuilder.h"
     24 #include "normalizer2impl.h"
     25 #include "utrie2.h"
     26 #include "uvectr32.h"
     27 #include "uvectr64.h"
     28 #include "uvector.h"
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 /**
     33  * Low-level base CollationData builder.
     34  */
     35 class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder {
     36 public:
     37     CollationBaseDataBuilder(UErrorCode &errorCode);
     38 
     39     virtual ~CollationBaseDataBuilder();
     40 
     41     void init(UErrorCode &errorCode);
     42 
     43     /**
     44      * Sets the Han ranges as ranges of offset CE32s.
     45      * Note: Unihan extension A sorts after the other BMP ranges.
     46      * See http://www.unicode.org/reports/tr10/#Implicit_Weights
     47      *
     48      * @param ranges array of ranges of [:Unified_Ideograph:] in collation order,
     49      *               as (start, end) code point pairs
     50      * @param length number of code points (not pairs)
     51      * @param errorCode in/out error code
     52      */
     53     void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode);
     54 
     55     void setNumericPrimary(uint32_t np) { numericPrimary = np; }
     56 
     57     virtual UBool isCompressibleLeadByte(uint32_t b) const;
     58 
     59     void setCompressibleLeadByte(uint32_t b);
     60 
     61     static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
     62     static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
     63 
     64     virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
     65 
     66     void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
     67     void addRootElement(int64_t ce, UErrorCode &errorCode);
     68 
     69     void addReorderingGroup(uint32_t firstByte, uint32_t lastByte,
     70                             const UnicodeString &groupScripts,
     71                             UErrorCode &errorCode);
     72 
     73     virtual void build(CollationData &data, UErrorCode &errorCode);
     74 
     75     void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode);
     76 
     77 private:
     78     int32_t writeRootElementsRange(
     79             uint32_t prevPrimary, uint32_t p, int32_t i,
     80             UVector32 &table, UErrorCode &errorCode);
     81 
     82     // Flags for which primary-weight lead bytes are compressible.
     83     UBool compressibleBytes[256];
     84     uint32_t numericPrimary;
     85     uint32_t firstHanPrimary;
     86     uint32_t lastHanPrimary;
     87     int32_t hanStep;
     88     UVector64 rootElements;
     89     UnicodeString scripts;
     90 };
     91 
     92 U_NAMESPACE_END
     93 
     94 #endif  // !UCONFIG_NO_COLLATION
     95 #endif  // __COLLATIONBASEDATABUILDER_H__
     96