1 /* 2 ******************************************************************************* 3 * Copyright (C) 2012-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collationbasedatabuilder.h 7 * 8 * created on: 2012aug11 9 * created by: Markus W. Scherer 10 */ 11 12 #ifndef __COLLATIONBASEDATABUILDER_H__ 13 #define __COLLATIONBASEDATABUILDER_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_COLLATION 18 19 #include "unicode/uniset.h" 20 #include "unicode/unistr.h" 21 #include "collation.h" 22 #include "collationdata.h" 23 #include "collationdatabuilder.h" 24 #include "normalizer2impl.h" 25 #include "utrie2.h" 26 #include "uvectr32.h" 27 #include "uvectr64.h" 28 #include "uvector.h" 29 30 U_NAMESPACE_BEGIN 31 32 /** 33 * Low-level base CollationData builder. 34 */ 35 class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder { 36 public: 37 CollationBaseDataBuilder(UErrorCode &errorCode); 38 39 virtual ~CollationBaseDataBuilder(); 40 41 void init(UErrorCode &errorCode); 42 43 /** 44 * Sets the Han ranges as ranges of offset CE32s. 45 * Note: Unihan extension A sorts after the other BMP ranges. 46 * See http://www.unicode.org/reports/tr10/#Implicit_Weights 47 * 48 * @param ranges array of ranges of [:Unified_Ideograph:] in collation order, 49 * as (start, end) code point pairs 50 * @param length number of code points (not pairs) 51 * @param errorCode in/out error code 52 */ 53 void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode); 54 55 void setNumericPrimary(uint32_t np) { numericPrimary = np; } 56 57 virtual UBool isCompressibleLeadByte(uint32_t b) const; 58 59 void setCompressibleLeadByte(uint32_t b); 60 61 static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); 62 static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); 63 64 virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 65 66 void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 67 void addRootElement(int64_t ce, UErrorCode &errorCode); 68 69 void addReorderingGroup(uint32_t firstByte, uint32_t lastByte, 70 const UnicodeString &groupScripts, 71 UErrorCode &errorCode); 72 73 virtual void build(CollationData &data, UErrorCode &errorCode); 74 75 void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode); 76 77 private: 78 int32_t writeRootElementsRange( 79 uint32_t prevPrimary, uint32_t p, int32_t i, 80 UVector32 &table, UErrorCode &errorCode); 81 82 // Flags for which primary-weight lead bytes are compressible. 83 UBool compressibleBytes[256]; 84 uint32_t numericPrimary; 85 uint32_t firstHanPrimary; 86 uint32_t lastHanPrimary; 87 int32_t hanStep; 88 UVector64 rootElements; 89 UnicodeString scripts; 90 }; 91 92 U_NAMESPACE_END 93 94 #endif // !UCONFIG_NO_COLLATION 95 #endif // __COLLATIONBASEDATABUILDER_H__ 96