1 /* 2 ******************************************************************************* 3 * Copyright (C) 2013-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: uscript_props.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2013feb16 12 * created by: Markus W. Scherer 13 */ 14 15 #include "unicode/utypes.h" 16 #include "unicode/unistr.h" 17 #include "unicode/uscript.h" 18 #include "unicode/utf16.h" 19 #include "ustr_imp.h" 20 #include "cmemory.h" 21 22 namespace { 23 24 // Script metadata (script properties). 25 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt 26 27 // 0 = NOT_ENCODED, no sample character, default false script properties. 28 // Bits 20.. 0: sample character 29 30 // Bits 23..21: usage 31 const int32_t UNKNOWN = 1 << 21; 32 const int32_t EXCLUSION = 2 << 21; 33 const int32_t LIMITED_USE = 3 << 21; 34 const int32_t ASPIRATIONAL = 4 << 21; 35 const int32_t RECOMMENDED = 5 << 21; 36 37 // Bits 31..24: Single-bit flags 38 const int32_t RTL = 1 << 24; 39 const int32_t LB_LETTERS = 1 << 25; 40 const int32_t CASED = 1 << 26; 41 42 const int32_t SCRIPT_PROPS[] = { 43 // Begin copy-paste output from 44 // tools/trunk/unicode/py/parsescriptmetadata.py 45 0x0040 | RECOMMENDED, // Zyyy 46 0x0308 | RECOMMENDED, // Zinh 47 0x0628 | RECOMMENDED | RTL, // Arab 48 0x0531 | RECOMMENDED | CASED, // Armn 49 0x0995 | RECOMMENDED, // Beng 50 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 51 0x13C4 | LIMITED_USE | CASED, // Cher 52 0x03E2 | EXCLUSION | CASED, // Copt 53 0x042F | RECOMMENDED | CASED, // Cyrl 54 0x10414 | EXCLUSION | CASED, // Dsrt 55 0x0905 | RECOMMENDED, // Deva 56 0x12A0 | RECOMMENDED, // Ethi 57 0x10D3 | RECOMMENDED, // Geor 58 0x10330 | EXCLUSION, // Goth 59 0x03A9 | RECOMMENDED | CASED, // Grek 60 0x0A95 | RECOMMENDED, // Gujr 61 0x0A15 | RECOMMENDED, // Guru 62 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 63 0xAC00 | RECOMMENDED, // Hang 64 0x05D0 | RECOMMENDED | RTL, // Hebr 65 0x304B | RECOMMENDED | LB_LETTERS, // Hira 66 0x0C95 | RECOMMENDED, // Knda 67 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 68 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 69 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 70 0x004C | RECOMMENDED | CASED, // Latn 71 0x0D15 | RECOMMENDED, // Mlym 72 0x1826 | ASPIRATIONAL, // Mong 73 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 74 0x168F | EXCLUSION, // Ogam 75 0x10308 | EXCLUSION, // Ital 76 0x0B15 | RECOMMENDED, // Orya 77 0x16A0 | EXCLUSION, // Runr 78 0x0D85 | RECOMMENDED, // Sinh 79 0x0710 | LIMITED_USE | RTL, // Syrc 80 0x0B95 | RECOMMENDED, // Taml 81 0x0C15 | RECOMMENDED, // Telu 82 0x078C | RECOMMENDED | RTL, // Thaa 83 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 84 0x0F40 | RECOMMENDED, // Tibt 85 0x14C0 | ASPIRATIONAL, // Cans 86 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii 87 0x1703 | EXCLUSION, // Tglg 88 0x1723 | EXCLUSION, // Hano 89 0x1743 | EXCLUSION, // Buhd 90 0x1763 | EXCLUSION, // Tagb 91 0x280E | UNKNOWN, // Brai 92 0x10800 | EXCLUSION | RTL, // Cprt 93 0x1900 | LIMITED_USE, // Limb 94 0x10000 | EXCLUSION, // Linb 95 0x10480 | EXCLUSION, // Osma 96 0x10450 | EXCLUSION, // Shaw 97 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 98 0x10380 | EXCLUSION, // Ugar 99 0, 100 0x1A00 | EXCLUSION, // Bugi 101 0x2C00 | EXCLUSION | CASED, // Glag 102 0x10A00 | EXCLUSION | RTL, // Khar 103 0xA800 | LIMITED_USE, // Sylo 104 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 105 0x2D5E | ASPIRATIONAL, // Tfng 106 0x103A0 | EXCLUSION, // Xpeo 107 0x1B05 | LIMITED_USE, // Bali 108 0x1BC0 | LIMITED_USE, // Batk 109 0, 110 0x11005 | EXCLUSION, // Brah 111 0xAA00 | LIMITED_USE, // Cham 112 0, 113 0, 114 0, 115 0, 116 0x13153 | EXCLUSION, // Egyp 117 0, 118 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 119 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 120 0x16B1C | EXCLUSION, // Hmng 121 0x10CA1 | EXCLUSION | RTL | CASED, // Hung 122 0, 123 0xA984 | LIMITED_USE, // Java 124 0xA90A | LIMITED_USE, // Kali 125 0, 126 0, 127 0x1C00 | LIMITED_USE, // Lepc 128 0x10647 | EXCLUSION, // Lina 129 0x0840 | LIMITED_USE | RTL, // Mand 130 0, 131 0x10980 | EXCLUSION | RTL, // Mero 132 0x07D8 | LIMITED_USE | RTL, // Nkoo 133 0x10C00 | EXCLUSION | RTL, // Orkh 134 0x1036B | EXCLUSION, // Perm 135 0xA840 | EXCLUSION, // Phag 136 0x10900 | EXCLUSION | RTL, // Phnx 137 0x16F00 | ASPIRATIONAL, // Plrd 138 0, 139 0, 140 0, 141 0, 142 0, 143 0, 144 0xA549 | LIMITED_USE, // Vaii 145 0, 146 0x12000 | EXCLUSION, // Xsux 147 0, 148 0xFDD0 | UNKNOWN, // Zzzz 149 0x102B7 | EXCLUSION, // Cari 150 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 151 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 152 0x10280 | EXCLUSION, // Lyci 153 0x10920 | EXCLUSION | RTL, // Lydi 154 0x1C5A | LIMITED_USE, // Olck 155 0xA930 | EXCLUSION, // Rjng 156 0xA882 | LIMITED_USE, // Saur 157 0x1D850 | EXCLUSION, // Sgnw 158 0x1B83 | LIMITED_USE, // Sund 159 0, 160 0xABC0 | LIMITED_USE, // Mtei 161 0x10840 | EXCLUSION | RTL, // Armi 162 0x10B00 | EXCLUSION | RTL, // Avst 163 0x11103 | LIMITED_USE, // Cakm 164 0xAC00 | RECOMMENDED, // Kore 165 0x11083 | EXCLUSION, // Kthi 166 0x10AC1 | EXCLUSION | RTL, // Mani 167 0x10B60 | EXCLUSION | RTL, // Phli 168 0x10B8F | EXCLUSION | RTL, // Phlp 169 0, 170 0x10B40 | EXCLUSION | RTL, // Prti 171 0x0800 | EXCLUSION | RTL, // Samr 172 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 173 0, 174 0, 175 0xA6A0 | LIMITED_USE, // Bamu 176 0xA4E8 | LIMITED_USE, // Lisu 177 0, 178 0x10A60 | EXCLUSION | RTL, // Sarb 179 0x16AE6 | EXCLUSION, // Bass 180 0x1BC20 | EXCLUSION, // Dupl 181 0x10500 | EXCLUSION, // Elba 182 0x11315 | EXCLUSION, // Gran 183 0, 184 0, 185 0x1E802 | EXCLUSION | RTL, // Mend 186 0x109A0 | EXCLUSION | RTL, // Merc 187 0x10A95 | EXCLUSION | RTL, // Narb 188 0x10896 | EXCLUSION | RTL, // Nbat 189 0x10873 | EXCLUSION | RTL, // Palm 190 0x112BE | EXCLUSION, // Sind 191 0x118B4 | EXCLUSION | CASED, // Wara 192 0, 193 0, 194 0x16A4F | EXCLUSION, // Mroo 195 0, 196 0x11183 | EXCLUSION, // Shrd 197 0x110D0 | EXCLUSION, // Sora 198 0x11680 | EXCLUSION, // Takr 199 0, 200 0, 201 0x14400 | EXCLUSION, // Hluw 202 0x11208 | EXCLUSION, // Khoj 203 0x11484 | EXCLUSION, // Tirh 204 0x10537 | EXCLUSION, // Aghb 205 0x11152 | EXCLUSION, // Mahj 206 0x11717 | EXCLUSION | LB_LETTERS, // Ahom 207 0x108F4 | EXCLUSION | RTL, // Hatr 208 0x1160E | EXCLUSION, // Modi 209 0x1128F | EXCLUSION, // Mult 210 0x11AC0 | EXCLUSION, // Pauc 211 0x1158E | EXCLUSION, // Sidd 212 // End copy-paste from parsescriptmetadata.py 213 }; 214 215 int32_t getScriptProps(UScriptCode script) { 216 if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { 217 return SCRIPT_PROPS[script]; 218 } else { 219 return 0; 220 } 221 } 222 223 } // namespace 224 225 U_CAPI int32_t U_EXPORT2 226 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { 227 if(U_FAILURE(*pErrorCode)) { return 0; } 228 if(capacity < 0 || (capacity > 0 && dest == NULL)) { 229 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 230 return 0; 231 } 232 int32_t sampleChar = getScriptProps(script) & 0x1fffff; 233 int32_t length; 234 if(sampleChar == 0) { 235 length = 0; 236 } else { 237 length = U16_LENGTH(sampleChar); 238 if(length <= capacity) { 239 int32_t i = 0; 240 U16_APPEND_UNSAFE(dest, i, sampleChar); 241 } 242 } 243 return u_terminateUChars(dest, capacity, length, pErrorCode); 244 } 245 246 U_COMMON_API icu::UnicodeString U_EXPORT2 247 uscript_getSampleUnicodeString(UScriptCode script) { 248 icu::UnicodeString sample; 249 int32_t sampleChar = getScriptProps(script) & 0x1fffff; 250 if(sampleChar != 0) { 251 sample.append(sampleChar); 252 } 253 return sample; 254 } 255 256 U_CAPI UScriptUsage U_EXPORT2 257 uscript_getUsage(UScriptCode script) { 258 return (UScriptUsage)((getScriptProps(script) >> 21) & 7); 259 } 260 261 U_CAPI UBool U_EXPORT2 262 uscript_isRightToLeft(UScriptCode script) { 263 return (getScriptProps(script) & RTL) != 0; 264 } 265 266 U_CAPI UBool U_EXPORT2 267 uscript_breaksBetweenLetters(UScriptCode script) { 268 return (getScriptProps(script) & LB_LETTERS) != 0; 269 } 270 271 U_CAPI UBool U_EXPORT2 272 uscript_isCased(UScriptCode script) { 273 return (getScriptProps(script) & CASED) != 0; 274 } 275