1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2004-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: gencase.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2004aug28 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __GENCASE_H__ 18 #define __GENCASE_H__ 19 20 #include "unicode/utypes.h" 21 #include "utrie.h" 22 #include "propsvec.h" 23 #include "ucase.h" 24 25 U_CDECL_BEGIN 26 27 /* Unicode versions --------------------------------------------------------- */ 28 29 enum { 30 UNI_1_0, 31 UNI_1_1, 32 UNI_2_0, 33 UNI_3_0, 34 UNI_3_1, 35 UNI_3_2, 36 UNI_4_0, 37 UNI_4_0_1, 38 UNI_4_1, 39 UNI_VER_COUNT 40 }; 41 42 extern int32_t ucdVersion; 43 44 /* gencase ------------------------------------------------------------------ */ 45 46 #define UGENCASE_EXC_SHIFT 16 47 #define UGENCASE_EXC_MASK 0xffff0000 48 49 /* 50 * Values for the ucase.icu unfold[] data array, see store.c. 51 * The values are stored in ucase.icu so that the runtime code will work with 52 * changing values, but they are hardcoded for gencase for simplicity. 53 * They are optimized, that is, provide for minimal table column widths, 54 * for the actual Unicode data, so that the table size is minimized. 55 * Future versions of Unicode may require increases of some of these values. 56 */ 57 enum { 58 UGENCASE_UNFOLD_STRING_WIDTH=3, 59 UGENCASE_UNFOLD_CP_WIDTH=2, 60 UGENCASE_UNFOLD_WIDTH=UGENCASE_UNFOLD_STRING_WIDTH+UGENCASE_UNFOLD_CP_WIDTH, 61 UGENCASE_UNFOLD_MAX_ROWS=250 62 }; 63 64 /* Values for additional data stored in pv column 1 */ 65 enum { 66 UGENCASE_IS_MID_LETTER_SHIFT /* bit 0 WB=MidLetter or WB=MidNumLet */ 67 }; 68 69 /* special casing data */ 70 typedef struct { 71 UChar32 code; 72 UBool isComplex; 73 UChar lowerCase[32], upperCase[32], titleCase[32]; 74 } SpecialCasing; 75 76 /* case folding data */ 77 typedef struct { 78 UChar32 code, simple; 79 char status; 80 UChar full[32]; 81 } CaseFolding; 82 83 /* case mapping properties */ 84 typedef struct { 85 UChar32 code, lowerCase, upperCase, titleCase; 86 UChar32 closure[8]; 87 SpecialCasing *specialCasing; 88 CaseFolding *caseFolding; 89 uint8_t gc, cc; 90 } Props; 91 92 /* global flags */ 93 extern UBool beVerbose, haveCopyright; 94 95 /* properties vectors in gencase.c */ 96 extern UPropsVectors *pv; 97 98 /* prototypes */ 99 U_CFUNC void 100 writeUCDFilename(char *basename, const char *filename, const char *suffix); 101 102 U_CFUNC UBool 103 isToken(const char *token, const char *s); 104 105 extern void 106 setUnicodeVersion(const char *v); 107 108 extern void 109 setProps(Props *p); 110 111 U_CFUNC uint32_t U_EXPORT2 112 getFoldedPropsValue(UNewTrie *trie, UChar32 start, int32_t offset); 113 114 extern void 115 addCaseSensitive(UChar32 first, UChar32 last); 116 117 extern void 118 makeCaseClosure(void); 119 120 extern void 121 makeExceptions(void); 122 123 extern void 124 generateData(const char *dataDir, UBool csource); 125 126 U_CDECL_END 127 128 #endif 129