Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2000-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ucol_elm.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created 02/22/2001
     14 *   created by: Vladimir Weinstein
     15 *
     16 *   This program reads the Franctional UCA table and generates
     17 *   internal format for UCA table as well as inverse UCA table.
     18 *   It then writes binary files containing the data: ucadata.dat
     19 *   & invuca.dat
     20 */
     21 #ifndef UCOL_UCAELEMS_H
     22 #define UCOL_UCAELEMS_H
     23 
     24 #include "unicode/utypes.h"
     25 #include "unicode/uniset.h"
     26 #include "ucol_tok.h"
     27 
     28 #if !UCONFIG_NO_COLLATION
     29 
     30 #include "ucol_imp.h"
     31 
     32 #ifdef UCOL_DEBUG
     33 #include "cmemory.h"
     34 #include <stdio.h>
     35 #endif
     36 
     37 U_CDECL_BEGIN
     38 
     39 /* This is the maximum trie capacity for the mapping trie.
     40 Due to current limitations in genuca and the design of UTrie,
     41 this number can't be more than 256K.
     42 As of Unicode 5, it currently could safely go to 128K without
     43 a problem. Normally, less than 32K are tailored.
     44 */
     45 #define UCOL_ELM_TRIE_CAPACITY 0x40000
     46 
     47 /* This is the maxmun capacity for temparay combining class
     48  * table.  The table will be compacted after scanning all the
     49  * Unicode codepoints.
     50 */
     51 #define UCOL_MAX_CM_TAB  0x10000
     52 
     53 
     54 typedef struct {
     55     uint32_t *CEs;
     56     int32_t position;
     57     int32_t size;
     58 } ExpansionTable;
     59 
     60 typedef struct {
     61     UChar prefixChars[128];
     62     UChar *prefix;
     63     uint32_t prefixSize;
     64     UChar uchars[128];
     65     UChar *cPoints;
     66     uint32_t cSize;          /* Number of characters in sequence - for contraction */
     67     uint32_t noOfCEs;        /* Number of collation elements                       */
     68     uint32_t CEs[128];      /* These are collation elements - there could be more than one - in case of expansion */
     69     uint32_t mapCE;         /* This is the value element maps in original table   */
     70     uint32_t sizePrim[128];
     71     uint32_t sizeSec[128];
     72     uint32_t sizeTer[128];
     73     UBool caseBit;
     74     UBool isThai;
     75 } UCAElements;
     76 
     77 typedef struct {
     78   uint32_t *endExpansionCE;
     79   UBool    *isV;
     80   int32_t   position;
     81   int32_t   size;
     82   uint8_t   maxLSize;
     83   uint8_t   maxVSize;
     84   uint8_t   maxTSize;
     85 } MaxJamoExpansionTable;
     86 
     87 typedef struct {
     88   uint32_t *endExpansionCE;
     89   uint8_t  *expansionCESize;
     90   int32_t   position;
     91   int32_t   size;
     92 } MaxExpansionTable;
     93 
     94 typedef struct {
     95     uint16_t   index[256];  /* index of cPoints by combining class 0-255. */
     96     UChar      *cPoints;    /* code point array of all combining marks */
     97     uint32_t   size;        /* total number of combining marks */
     98 } CombinClassTable;
     99 
    100 typedef struct {
    101   /*CompactEIntArray      *mapping; */
    102   UNewTrie                 *mapping;
    103   ExpansionTable        *expansions;
    104   struct CntTable       *contractions;
    105   UCATableHeader        *image;
    106   UColOptionSet         *options;
    107   MaxExpansionTable     *maxExpansions;
    108   MaxJamoExpansionTable *maxJamoExpansions;
    109   uint8_t               *unsafeCP;
    110   uint8_t               *contrEndCP;
    111   const UCollator       *UCA;
    112   UHashtable      *prefixLookup;
    113   CombinClassTable      *cmLookup;  /* combining class lookup for tailoring. */
    114 } tempUCATable;
    115 
    116 typedef struct {
    117     UChar cp;
    118     uint16_t cClass;   // combining class
    119 }CompData;
    120 
    121 typedef struct {
    122     CompData *precomp;
    123     int32_t precompLen;
    124     UChar *decomp;
    125     int32_t decompLen;
    126     UChar *comp;
    127     int32_t compLen;
    128     uint16_t curClass;
    129     uint16_t tailoringCM;
    130     int32_t  cmPos;
    131 }tempTailorContext;
    132 
    133 U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status);
    134 U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t);
    135 U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status);
    136 U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status);
    137 
    138 U_CAPI int32_t U_EXPORT2
    139 uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src,
    140                           U_NAMESPACE_QUALIFIER UnicodeSet *closed, UErrorCode *status);
    141 
    142 U_CDECL_END
    143 
    144 #endif /* #if !UCONFIG_NO_COLLATION */
    145 
    146 #endif
    147