Home | History | Annotate | Download | only in gencase
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2004-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  gencase.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004aug28
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __GENCASE_H__
     18 #define __GENCASE_H__
     19 
     20 #include "unicode/utypes.h"
     21 #include "utrie.h"
     22 #include "propsvec.h"
     23 #include "ucase.h"
     24 
     25 U_CDECL_BEGIN
     26 
     27 /* Unicode versions --------------------------------------------------------- */
     28 
     29 enum {
     30     UNI_1_0,
     31     UNI_1_1,
     32     UNI_2_0,
     33     UNI_3_0,
     34     UNI_3_1,
     35     UNI_3_2,
     36     UNI_4_0,
     37     UNI_4_0_1,
     38     UNI_4_1,
     39     UNI_5_1,
     40     UNI_5_2,
     41     UNI_6_0,
     42     UNI_VER_COUNT
     43 };
     44 
     45 extern int32_t ucdVersion;
     46 
     47 /* gencase ------------------------------------------------------------------ */
     48 
     49 #define UGENCASE_EXC_SHIFT     16
     50 #define UGENCASE_EXC_MASK      0xffff0000
     51 
     52 /*
     53  * Values for the ucase.icu unfold[] data array, see store.c.
     54  * The values are stored in ucase.icu so that the runtime code will work with
     55  * changing values, but they are hardcoded for gencase for simplicity.
     56  * They are optimized, that is, provide for minimal table column widths,
     57  * for the actual Unicode data, so that the table size is minimized.
     58  * Future versions of Unicode may require increases of some of these values.
     59  */
     60 enum {
     61     UGENCASE_UNFOLD_STRING_WIDTH=3,
     62     UGENCASE_UNFOLD_CP_WIDTH=2,
     63     UGENCASE_UNFOLD_WIDTH=UGENCASE_UNFOLD_STRING_WIDTH+UGENCASE_UNFOLD_CP_WIDTH,
     64     UGENCASE_UNFOLD_MAX_ROWS=250
     65 };
     66 
     67 /* Values for additional data stored in pv column 1 */
     68 enum {
     69     UGENCASE_IS_MID_LETTER_SHIFT        /* bit 0  WB=MidLetter or WB=MidNumLet */
     70 };
     71 
     72 /* special casing data */
     73 typedef struct {
     74     UChar32 code;
     75     UBool isComplex;
     76     UChar lowerCase[32], upperCase[32], titleCase[32];
     77 } SpecialCasing;
     78 
     79 /* case folding data */
     80 typedef struct {
     81     UChar32 code, simple;
     82     char status;
     83     UChar full[32];
     84 } CaseFolding;
     85 
     86 /* case mapping properties */
     87 typedef struct {
     88     UChar32 code, lowerCase, upperCase, titleCase;
     89     UChar32 closure[8];
     90     SpecialCasing *specialCasing;
     91     CaseFolding *caseFolding;
     92     uint8_t gc, cc;
     93     UBool isCaseIgnorable;
     94 } Props;
     95 
     96 /* global flags */
     97 extern UBool beVerbose, haveCopyright;
     98 
     99 /* properties vectors in gencase.c */
    100 extern UPropsVectors *pv;
    101 
    102 /* prototypes */
    103 U_CFUNC void
    104 writeUCDFilename(char *basename, const char *filename, const char *suffix);
    105 
    106 U_CFUNC UBool
    107 isToken(const char *token, const char *s);
    108 
    109 extern void
    110 setUnicodeVersion(const char *v);
    111 
    112 extern void
    113 setProps(Props *p);
    114 
    115 U_CFUNC uint32_t U_EXPORT2
    116 getFoldedPropsValue(UNewTrie *trie, UChar32 start, int32_t offset);
    117 
    118 extern void
    119 addCaseSensitive(UChar32 first, UChar32 last);
    120 
    121 extern void
    122 makeCaseClosure(void);
    123 
    124 extern void
    125 makeExceptions(void);
    126 
    127 extern void
    128 generateData(const char *dataDir, UBool csource);
    129 
    130 U_CDECL_END
    131 
    132 #endif
    133