Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2001-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ucol_tok.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created 02/22/2001
     14 *   created by: Vladimir Weinstein
     15 *
     16 * This module reads a tailoring rule string and produces a list of
     17 * tokens that will be turned into collation elements
     18 *
     19 */
     20 
     21 #ifndef UCOL_TOKENS_H
     22 #define UCOL_TOKENS_H
     23 
     24 #include "unicode/utypes.h"
     25 #include "unicode/uset.h"
     26 
     27 #if !UCONFIG_NO_COLLATION
     28 
     29 #include "ucol_imp.h"
     30 #include "uhash.h"
     31 #include "unicode/parseerr.h"
     32 
     33 #define UCOL_TOK_UNSET 0xFFFFFFFF
     34 #define UCOL_TOK_RESET 0xDEADBEEF
     35 
     36 #define UCOL_TOK_POLARITY_NEGATIVE 0
     37 #define UCOL_TOK_POLARITY_POSITIVE 1
     38 
     39 #define UCOL_TOK_TOP 0x04
     40 #define UCOL_TOK_VARIABLE_TOP 0x08
     41 #define UCOL_TOK_BEFORE 0x03
     42 #define UCOL_TOK_SUCCESS 0x10
     43 
     44 /* this is space for the extra strings that need to be unquoted */
     45 /* during the parsing of the rules */
     46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
     47 typedef struct UColToken UColToken;
     48 
     49 typedef struct  {
     50   UColToken* first;
     51   UColToken* last;
     52   UColToken* reset;
     53   UBool indirect;
     54   uint32_t baseCE;
     55   uint32_t baseContCE;
     56   uint32_t nextCE;
     57   uint32_t nextContCE;
     58   uint32_t previousCE;
     59   uint32_t previousContCE;
     60   int32_t pos[UCOL_STRENGTH_LIMIT];
     61   uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
     62   uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
     63   uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
     64   UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
     65   UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
     66 } UColTokListHeader;
     67 
     68 struct UColToken {
     69   UChar debugSource;
     70   UChar debugExpansion;
     71   UChar debugPrefix;
     72   uint32_t CEs[128];
     73   uint32_t noOfCEs;
     74   uint32_t expCEs[128];
     75   uint32_t noOfExpCEs;
     76   uint32_t source;
     77   uint32_t expansion;
     78   uint32_t prefix;
     79   uint32_t strength;
     80   uint32_t toInsert;
     81   uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
     82   UColTokListHeader *listHeader;
     83   UColToken* previous;
     84   UColToken* next;
     85   UChar **rulesToParseHdl;
     86   uint16_t flags;
     87 };
     88 
     89 /*
     90  * This is a token that has been parsed
     91  * but not yet processed. Used to reduce
     92  * the number of arguments in the parser
     93  */
     94 typedef struct {
     95   uint32_t strength;
     96   uint32_t charsOffset;
     97   uint32_t charsLen;
     98   uint32_t extensionOffset;
     99   uint32_t extensionLen;
    100   uint32_t prefixOffset;
    101   uint32_t prefixLen;
    102   uint16_t flags;
    103   uint16_t indirectIndex;
    104 } UColParsedToken;
    105 
    106 
    107 typedef struct {
    108   UColParsedToken parsedToken;
    109   UChar *source;
    110   UChar *end;
    111   const UChar *current;
    112   UChar *sourceCurrent;
    113   UChar *extraCurrent;
    114   UChar *extraEnd;
    115   const InverseUCATableHeader *invUCA;
    116   const UCollator *UCA;
    117   UHashtable *tailored;
    118   UColOptionSet *opts;
    119   uint32_t resultLen;
    120   uint32_t listCapacity;
    121   UColTokListHeader *lh;
    122   UColToken *varTop;
    123   USet *copySet;
    124   USet *removeSet;
    125   UBool buildCCTabFlag;  /* Tailoring rule requirs building combining class table. */
    126 
    127   UChar32 previousCp;               /* Previous code point. */
    128   /* For processing starred lists. */
    129   UBool isStarred;                   /* Are we processing a starred token? */
    130   UBool savedIsStarred;
    131   uint32_t currentStarredCharIndex;  /* Index of the current charrecter in the starred expression. */
    132   uint32_t lastStarredCharIndex;    /* Index to the last character in the starred expression. */
    133 
    134   /* For processing ranges. */
    135   UBool inRange;                     /* Are we in a range? */
    136   UChar32 currentRangeCp;           /* Current code point in the range. */
    137   UChar32 lastRangeCp;              /* The last code point in the range. */
    138 
    139   /* reorder codes for collation reordering */
    140   int32_t* reorderCodes;
    141   int32_t reorderCodesLength;
    142 
    143 } UColTokenParser;
    144 
    145 typedef struct {
    146   const UChar *subName;
    147   int32_t subLen;
    148   UColAttributeValue attrVal;
    149 } ucolTokSuboption;
    150 
    151 typedef struct {
    152    const UChar *optionName;
    153    int32_t optionLen;
    154    const ucolTokSuboption *subopts;
    155    int32_t subSize;
    156    UColAttribute attr;
    157 } ucolTokOption;
    158 
    159 #define ucol_tok_isSpecialChar(ch)              \
    160     (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
    161       (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
    162       (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
    163       (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
    164       (ch) == 0x007B))
    165 
    166 
    167 U_CFUNC
    168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
    169                                     UParseError *parseError,
    170                                     UErrorCode *status);
    171 
    172 U_CFUNC
    173 void ucol_tok_initTokenList(UColTokenParser *src,
    174                             const UChar *rules,
    175                             const uint32_t rulesLength,
    176                             const UCollator *UCA,
    177                             GetCollationRulesFunction importFunc,
    178                             void* context,
    179                             UErrorCode *status);
    180 
    181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
    182 
    183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src,
    184                         UBool startOfRules,
    185                         UParseError *parseError,
    186                         UErrorCode *status);
    187 
    188 
    189 U_CAPI const UChar * U_EXPORT2
    190 ucol_tok_getNextArgument(const UChar *start, const UChar *end,
    191                                UColAttribute *attrib, UColAttributeValue *value,
    192                                UErrorCode *status);
    193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
    194                                             uint32_t CE, uint32_t contCE,
    195                                             uint32_t *nextCE, uint32_t *nextContCE,
    196                                             uint32_t strength);
    197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
    198                                             uint32_t CE, uint32_t contCE,
    199                                             uint32_t *prevCE, uint32_t *prevContCE,
    200                                             uint32_t strength);
    201 
    202 const UChar* U_CALLCONV ucol_tok_getRulesFromBundle(
    203     void* context,
    204     const char* locale,
    205     const char* type,
    206     int32_t* pLength,
    207     UErrorCode* status);
    208 
    209 #endif /* #if !UCONFIG_NO_COLLATION */
    210 
    211 #endif
    212