Home | History | Annotate | Download | only in gencase
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2004-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  gencase.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004aug28
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __GENCASE_H__
     18 #define __GENCASE_H__
     19 
     20 #include "unicode/utypes.h"
     21 #include "utrie.h"
     22 #include "propsvec.h"
     23 #include "ucase.h"
     24 
     25 U_CDECL_BEGIN
     26 
     27 /* Unicode versions --------------------------------------------------------- */
     28 
     29 enum {
     30     UNI_1_0,
     31     UNI_1_1,
     32     UNI_2_0,
     33     UNI_3_0,
     34     UNI_3_1,
     35     UNI_3_2,
     36     UNI_4_0,
     37     UNI_4_0_1,
     38     UNI_4_1,
     39     UNI_VER_COUNT
     40 };
     41 
     42 extern int32_t ucdVersion;
     43 
     44 /* gencase ------------------------------------------------------------------ */
     45 
     46 #define UGENCASE_EXC_SHIFT     16
     47 #define UGENCASE_EXC_MASK      0xffff0000
     48 
     49 /*
     50  * Values for the ucase.icu unfold[] data array, see store.c.
     51  * The values are stored in ucase.icu so that the runtime code will work with
     52  * changing values, but they are hardcoded for gencase for simplicity.
     53  * They are optimized, that is, provide for minimal table column widths,
     54  * for the actual Unicode data, so that the table size is minimized.
     55  * Future versions of Unicode may require increases of some of these values.
     56  */
     57 enum {
     58     UGENCASE_UNFOLD_STRING_WIDTH=3,
     59     UGENCASE_UNFOLD_CP_WIDTH=2,
     60     UGENCASE_UNFOLD_WIDTH=UGENCASE_UNFOLD_STRING_WIDTH+UGENCASE_UNFOLD_CP_WIDTH,
     61     UGENCASE_UNFOLD_MAX_ROWS=250
     62 };
     63 
     64 /* Values for additional data stored in pv column 1 */
     65 enum {
     66     UGENCASE_IS_MID_LETTER_SHIFT        /* bit 0  WB=MidLetter or WB=MidNumLet */
     67 };
     68 
     69 /* special casing data */
     70 typedef struct {
     71     UChar32 code;
     72     UBool isComplex;
     73     UChar lowerCase[32], upperCase[32], titleCase[32];
     74 } SpecialCasing;
     75 
     76 /* case folding data */
     77 typedef struct {
     78     UChar32 code, simple;
     79     char status;
     80     UChar full[32];
     81 } CaseFolding;
     82 
     83 /* case mapping properties */
     84 typedef struct {
     85     UChar32 code, lowerCase, upperCase, titleCase;
     86     UChar32 closure[8];
     87     SpecialCasing *specialCasing;
     88     CaseFolding *caseFolding;
     89     uint8_t gc, cc;
     90 } Props;
     91 
     92 /* global flags */
     93 extern UBool beVerbose, haveCopyright;
     94 
     95 /* properties vectors in gencase.c */
     96 extern UPropsVectors *pv;
     97 
     98 /* prototypes */
     99 U_CFUNC void
    100 writeUCDFilename(char *basename, const char *filename, const char *suffix);
    101 
    102 U_CFUNC UBool
    103 isToken(const char *token, const char *s);
    104 
    105 extern void
    106 setUnicodeVersion(const char *v);
    107 
    108 extern void
    109 setProps(Props *p);
    110 
    111 U_CFUNC uint32_t U_EXPORT2
    112 getFoldedPropsValue(UNewTrie *trie, UChar32 start, int32_t offset);
    113 
    114 extern void
    115 addCaseSensitive(UChar32 first, UChar32 last);
    116 
    117 extern void
    118 makeCaseClosure(void);
    119 
    120 extern void
    121 makeExceptions(void);
    122 
    123 extern void
    124 generateData(const char *dataDir, UBool csource);
    125 
    126 U_CDECL_END
    127 
    128 #endif
    129