1 /* 2 ******************************************************************************* 3 * Copyright (C) 2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: uscript_props.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2013feb16 12 * created by: Markus W. Scherer 13 */ 14 15 #include "unicode/utypes.h" 16 #include "unicode/unistr.h" 17 #include "unicode/uscript.h" 18 #include "unicode/utf16.h" 19 #include "ustr_imp.h" 20 21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 22 23 namespace { 24 25 // Script metadata (script properties). 26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt 27 28 // 0 = NOT_ENCODED, no sample character, default false script properties. 29 // Bits 20.. 0: sample character 30 31 // Bits 23..21: usage 32 const int32_t UNKNOWN = 1 << 21; 33 const int32_t EXCLUSION = 2 << 21; 34 const int32_t LIMITED_USE = 3 << 21; 35 const int32_t ASPIRATIONAL = 4 << 21; 36 const int32_t RECOMMENDED = 5 << 21; 37 38 // Bits 31..24: Single-bit flags 39 const int32_t RTL = 1 << 24; 40 const int32_t LB_LETTERS = 1 << 25; 41 const int32_t CASED = 1 << 26; 42 43 const int32_t SCRIPT_PROPS[] = { 44 // Begin copy-paste output from 45 // tools/trunk/unicode/py/parsescriptmetadata.py 46 0x0040 | UNKNOWN, // Zyyy 47 0x0308 | UNKNOWN, // Zinh 48 0x0628 | RECOMMENDED | RTL, // Arab 49 0x0531 | RECOMMENDED | CASED, // Armn 50 0x0995 | RECOMMENDED, // Beng 51 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 52 0x13C4 | LIMITED_USE, // Cher 53 0x03E2 | EXCLUSION | CASED, // Copt 54 0x042F | RECOMMENDED | CASED, // Cyrl 55 0x10414 | EXCLUSION | CASED, // Dsrt 56 0x0905 | RECOMMENDED, // Deva 57 0x12A0 | RECOMMENDED, // Ethi 58 0x10D3 | RECOMMENDED, // Geor 59 0x10330 | EXCLUSION, // Goth 60 0x03A9 | RECOMMENDED | CASED, // Grek 61 0x0A95 | RECOMMENDED, // Gujr 62 0x0A15 | RECOMMENDED, // Guru 63 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 64 0xAC00 | RECOMMENDED, // Hang 65 0x05D0 | RECOMMENDED | RTL, // Hebr 66 0x304B | RECOMMENDED | LB_LETTERS, // Hira 67 0x0C95 | RECOMMENDED, // Knda 68 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 69 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 70 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 71 0x004C | RECOMMENDED | CASED, // Latn 72 0x0D15 | RECOMMENDED, // Mlym 73 0x1826 | ASPIRATIONAL, // Mong 74 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 75 0x168F | EXCLUSION, // Ogam 76 0x10300 | EXCLUSION, // Ital 77 0x0B15 | RECOMMENDED, // Orya 78 0x16A0 | EXCLUSION, // Runr 79 0x0D85 | RECOMMENDED, // Sinh 80 0x0710 | LIMITED_USE | RTL, // Syrc 81 0x0B95 | RECOMMENDED, // Taml 82 0x0C15 | RECOMMENDED, // Telu 83 0x078C | RECOMMENDED | RTL, // Thaa 84 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 85 0x0F40 | RECOMMENDED, // Tibt 86 0x14C0 | ASPIRATIONAL, // Cans 87 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii 88 0x1703 | EXCLUSION, // Tglg 89 0x1723 | EXCLUSION, // Hano 90 0x1743 | EXCLUSION, // Buhd 91 0x1763 | EXCLUSION, // Tagb 92 0x2800 | UNKNOWN, // Brai 93 0x10800 | EXCLUSION | RTL, // Cprt 94 0x1900 | LIMITED_USE, // Limb 95 0x10000 | EXCLUSION, // Linb 96 0x10480 | EXCLUSION, // Osma 97 0x10450 | EXCLUSION, // Shaw 98 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 99 0x10380 | EXCLUSION, // Ugar 100 0, 101 0x1A00 | EXCLUSION, // Bugi 102 0x2C00 | EXCLUSION | CASED, // Glag 103 0x10A00 | EXCLUSION | RTL, // Khar 104 0xA800 | LIMITED_USE, // Sylo 105 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 106 0x2D30 | ASPIRATIONAL, // Tfng 107 0x103A0 | EXCLUSION, // Xpeo 108 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali 109 0x1BC0 | LIMITED_USE, // Batk 110 0, 111 0x11005 | EXCLUSION, // Brah 112 0xAA00 | LIMITED_USE, // Cham 113 0, 114 0, 115 0, 116 0, 117 0x13153 | EXCLUSION, // Egyp 118 0, 119 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 120 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 121 0, 122 0, 123 0, 124 0xA984 | LIMITED_USE | LB_LETTERS, // Java 125 0xA90A | LIMITED_USE, // Kali 126 0, 127 0, 128 0x1C00 | LIMITED_USE, // Lepc 129 0, 130 0x0840 | LIMITED_USE | RTL, // Mand 131 0, 132 0x10980 | EXCLUSION | RTL, // Mero 133 0x07CA | LIMITED_USE | RTL, // Nkoo 134 0x10C00 | EXCLUSION | RTL, // Orkh 135 0, 136 0xA840 | EXCLUSION, // Phag 137 0x10900 | EXCLUSION | RTL, // Phnx 138 0x16F00 | ASPIRATIONAL, // Plrd 139 0, 140 0, 141 0, 142 0, 143 0, 144 0, 145 0xA549 | LIMITED_USE, // Vaii 146 0, 147 0x12000 | EXCLUSION, // Xsux 148 0, 149 0xFDD0 | UNKNOWN, // Zzzz 150 0x102A0 | EXCLUSION, // Cari 151 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 152 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 153 0x10280 | EXCLUSION, // Lyci 154 0x10920 | EXCLUSION | RTL, // Lydi 155 0x1C5A | LIMITED_USE, // Olck 156 0xA930 | EXCLUSION, // Rjng 157 0xA882 | LIMITED_USE, // Saur 158 0, 159 0x1B83 | LIMITED_USE, // Sund 160 0, 161 0xABC0 | LIMITED_USE, // Mtei 162 0x10840 | EXCLUSION | RTL, // Armi 163 0x10B00 | EXCLUSION | RTL, // Avst 164 0x11103 | LIMITED_USE, // Cakm 165 0xAC00 | RECOMMENDED, // Kore 166 0x11083 | EXCLUSION, // Kthi 167 0, 168 0x10B60 | EXCLUSION | RTL, // Phli 169 0, 170 0, 171 0x10B40 | EXCLUSION | RTL, // Prti 172 0x0800 | EXCLUSION | RTL, // Samr 173 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 174 0, 175 0, 176 0xA6A0 | LIMITED_USE, // Bamu 177 0xA4D0 | LIMITED_USE, // Lisu 178 0, 179 0x10A60 | EXCLUSION | RTL, // Sarb 180 0, 181 0, 182 0, 183 0, 184 0, 185 0, 186 0, 187 0x109A0 | EXCLUSION | RTL, // Merc 188 0, 189 0, 190 0, 191 0, 192 0, 193 0, 194 0, 195 0, 196 0, 197 0x11183 | EXCLUSION, // Shrd 198 0x110D0 | EXCLUSION, // Sora 199 0x11680 | EXCLUSION, // Takr 200 0, 201 0, 202 0, 203 0, 204 0, 205 // End copy-paste from parsescriptmetadata.py 206 }; 207 208 int32_t getScriptProps(UScriptCode script) { 209 if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { 210 return SCRIPT_PROPS[script]; 211 } else { 212 return 0; 213 } 214 } 215 216 } // namespace 217 218 U_CAPI int32_t U_EXPORT2 219 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { 220 if(U_FAILURE(*pErrorCode)) { return 0; } 221 if(capacity < 0 || (capacity > 0 && dest == NULL)) { 222 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 223 return 0; 224 } 225 int32_t sampleChar = getScriptProps(script) & 0x1fffff; 226 int32_t length; 227 if(sampleChar == 0) { 228 length = 0; 229 } else { 230 length = U16_LENGTH(sampleChar); 231 if(length <= capacity) { 232 int32_t i = 0; 233 U16_APPEND_UNSAFE(dest, i, sampleChar); 234 } 235 } 236 return u_terminateUChars(dest, capacity, length, pErrorCode); 237 } 238 239 U_COMMON_API icu::UnicodeString U_EXPORT2 240 uscript_getSampleUnicodeString(UScriptCode script) { 241 icu::UnicodeString sample; 242 int32_t sampleChar = getScriptProps(script) & 0x1fffff; 243 if(sampleChar != 0) { 244 sample.append(sampleChar); 245 } 246 return sample; 247 } 248 249 U_CAPI UScriptUsage U_EXPORT2 250 uscript_getUsage(UScriptCode script) { 251 return (UScriptUsage)((getScriptProps(script) >> 21) & 7); 252 } 253 254 U_CAPI UBool U_EXPORT2 255 uscript_isRightToLeft(UScriptCode script) { 256 return (getScriptProps(script) & RTL) != 0; 257 } 258 259 U_CAPI UBool U_EXPORT2 260 uscript_breaksBetweenLetters(UScriptCode script) { 261 return (getScriptProps(script) & LB_LETTERS) != 0; 262 } 263 264 U_CAPI UBool U_EXPORT2 265 uscript_isCased(UScriptCode script) { 266 return (getScriptProps(script) & CASED) != 0; 267 } 268