1 /************************************************* 2 * Unicode Property Table handler * 3 *************************************************/ 4 5 #ifndef _UCP_H 6 #define _UCP_H 7 8 /* This file contains definitions of the property values that are returned by 9 the UCD access macros. New values that are added for new releases of Unicode 10 should always be at the end of each enum, for backwards compatibility. 11 12 IMPORTANT: Note also that the specific numeric values of the enums have to be 13 the same as the values that are generated by the maint/MultiStage2.py script, 14 where the equivalent property descriptive names are listed in vectors. 15 16 ALSO: The specific values of the first two enums are assumed for the table 17 called catposstab in pcre_compile.c. */ 18 19 /* These are the general character categories. */ 20 21 enum { 22 ucp_C, /* Other */ 23 ucp_L, /* Letter */ 24 ucp_M, /* Mark */ 25 ucp_N, /* Number */ 26 ucp_P, /* Punctuation */ 27 ucp_S, /* Symbol */ 28 ucp_Z /* Separator */ 29 }; 30 31 /* These are the particular character categories. */ 32 33 enum { 34 ucp_Cc, /* Control */ 35 ucp_Cf, /* Format */ 36 ucp_Cn, /* Unassigned */ 37 ucp_Co, /* Private use */ 38 ucp_Cs, /* Surrogate */ 39 ucp_Ll, /* Lower case letter */ 40 ucp_Lm, /* Modifier letter */ 41 ucp_Lo, /* Other letter */ 42 ucp_Lt, /* Title case letter */ 43 ucp_Lu, /* Upper case letter */ 44 ucp_Mc, /* Spacing mark */ 45 ucp_Me, /* Enclosing mark */ 46 ucp_Mn, /* Non-spacing mark */ 47 ucp_Nd, /* Decimal number */ 48 ucp_Nl, /* Letter number */ 49 ucp_No, /* Other number */ 50 ucp_Pc, /* Connector punctuation */ 51 ucp_Pd, /* Dash punctuation */ 52 ucp_Pe, /* Close punctuation */ 53 ucp_Pf, /* Final punctuation */ 54 ucp_Pi, /* Initial punctuation */ 55 ucp_Po, /* Other punctuation */ 56 ucp_Ps, /* Open punctuation */ 57 ucp_Sc, /* Currency symbol */ 58 ucp_Sk, /* Modifier symbol */ 59 ucp_Sm, /* Mathematical symbol */ 60 ucp_So, /* Other symbol */ 61 ucp_Zl, /* Line separator */ 62 ucp_Zp, /* Paragraph separator */ 63 ucp_Zs /* Space separator */ 64 }; 65 66 /* These are grapheme break properties. Note that the code for processing them 67 assumes that the values are less than 16. If more values are added that take 68 the number to 16 or more, the code will have to be rewritten. */ 69 70 enum { 71 ucp_gbCR, /* 0 */ 72 ucp_gbLF, /* 1 */ 73 ucp_gbControl, /* 2 */ 74 ucp_gbExtend, /* 3 */ 75 ucp_gbPrepend, /* 4 */ 76 ucp_gbSpacingMark, /* 5 */ 77 ucp_gbL, /* 6 Hangul syllable type L */ 78 ucp_gbV, /* 7 Hangul syllable type V */ 79 ucp_gbT, /* 8 Hangul syllable type T */ 80 ucp_gbLV, /* 9 Hangul syllable type LV */ 81 ucp_gbLVT, /* 10 Hangul syllable type LVT */ 82 ucp_gbRegionalIndicator, /* 11 */ 83 ucp_gbOther /* 12 */ 84 }; 85 86 /* These are the script identifications. */ 87 88 enum { 89 ucp_Arabic, 90 ucp_Armenian, 91 ucp_Bengali, 92 ucp_Bopomofo, 93 ucp_Braille, 94 ucp_Buginese, 95 ucp_Buhid, 96 ucp_Canadian_Aboriginal, 97 ucp_Cherokee, 98 ucp_Common, 99 ucp_Coptic, 100 ucp_Cypriot, 101 ucp_Cyrillic, 102 ucp_Deseret, 103 ucp_Devanagari, 104 ucp_Ethiopic, 105 ucp_Georgian, 106 ucp_Glagolitic, 107 ucp_Gothic, 108 ucp_Greek, 109 ucp_Gujarati, 110 ucp_Gurmukhi, 111 ucp_Han, 112 ucp_Hangul, 113 ucp_Hanunoo, 114 ucp_Hebrew, 115 ucp_Hiragana, 116 ucp_Inherited, 117 ucp_Kannada, 118 ucp_Katakana, 119 ucp_Kharoshthi, 120 ucp_Khmer, 121 ucp_Lao, 122 ucp_Latin, 123 ucp_Limbu, 124 ucp_Linear_B, 125 ucp_Malayalam, 126 ucp_Mongolian, 127 ucp_Myanmar, 128 ucp_New_Tai_Lue, 129 ucp_Ogham, 130 ucp_Old_Italic, 131 ucp_Old_Persian, 132 ucp_Oriya, 133 ucp_Osmanya, 134 ucp_Runic, 135 ucp_Shavian, 136 ucp_Sinhala, 137 ucp_Syloti_Nagri, 138 ucp_Syriac, 139 ucp_Tagalog, 140 ucp_Tagbanwa, 141 ucp_Tai_Le, 142 ucp_Tamil, 143 ucp_Telugu, 144 ucp_Thaana, 145 ucp_Thai, 146 ucp_Tibetan, 147 ucp_Tifinagh, 148 ucp_Ugaritic, 149 ucp_Yi, 150 /* New for Unicode 5.0: */ 151 ucp_Balinese, 152 ucp_Cuneiform, 153 ucp_Nko, 154 ucp_Phags_Pa, 155 ucp_Phoenician, 156 /* New for Unicode 5.1: */ 157 ucp_Carian, 158 ucp_Cham, 159 ucp_Kayah_Li, 160 ucp_Lepcha, 161 ucp_Lycian, 162 ucp_Lydian, 163 ucp_Ol_Chiki, 164 ucp_Rejang, 165 ucp_Saurashtra, 166 ucp_Sundanese, 167 ucp_Vai, 168 /* New for Unicode 5.2: */ 169 ucp_Avestan, 170 ucp_Bamum, 171 ucp_Egyptian_Hieroglyphs, 172 ucp_Imperial_Aramaic, 173 ucp_Inscriptional_Pahlavi, 174 ucp_Inscriptional_Parthian, 175 ucp_Javanese, 176 ucp_Kaithi, 177 ucp_Lisu, 178 ucp_Meetei_Mayek, 179 ucp_Old_South_Arabian, 180 ucp_Old_Turkic, 181 ucp_Samaritan, 182 ucp_Tai_Tham, 183 ucp_Tai_Viet, 184 /* New for Unicode 6.0.0: */ 185 ucp_Batak, 186 ucp_Brahmi, 187 ucp_Mandaic, 188 /* New for Unicode 6.1.0: */ 189 ucp_Chakma, 190 ucp_Meroitic_Cursive, 191 ucp_Meroitic_Hieroglyphs, 192 ucp_Miao, 193 ucp_Sharada, 194 ucp_Sora_Sompeng, 195 ucp_Takri, 196 /* New for Unicode 7.0.0: */ 197 ucp_Bassa_Vah, 198 ucp_Caucasian_Albanian, 199 ucp_Duployan, 200 ucp_Elbasan, 201 ucp_Grantha, 202 ucp_Khojki, 203 ucp_Khudawadi, 204 ucp_Linear_A, 205 ucp_Mahajani, 206 ucp_Manichaean, 207 ucp_Mende_Kikakui, 208 ucp_Modi, 209 ucp_Mro, 210 ucp_Nabataean, 211 ucp_Old_North_Arabian, 212 ucp_Old_Permic, 213 ucp_Pahawh_Hmong, 214 ucp_Palmyrene, 215 ucp_Psalter_Pahlavi, 216 ucp_Pau_Cin_Hau, 217 ucp_Siddham, 218 ucp_Tirhuta, 219 ucp_Warang_Citi 220 }; 221 222 #endif 223 224 /* End of ucp.h */ 225