Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2013-2015, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  uscript_props.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2013feb16
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include "unicode/utypes.h"
     16 #include "unicode/unistr.h"
     17 #include "unicode/uscript.h"
     18 #include "unicode/utf16.h"
     19 #include "ustr_imp.h"
     20 #include "cmemory.h"
     21 
     22 namespace {
     23 
     24 // Script metadata (script properties).
     25 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
     26 
     27 // 0 = NOT_ENCODED, no sample character, default false script properties.
     28 // Bits 20.. 0: sample character
     29 
     30 // Bits 23..21: usage
     31 const int32_t UNKNOWN = 1 << 21;
     32 const int32_t EXCLUSION = 2 << 21;
     33 const int32_t LIMITED_USE = 3 << 21;
     34 const int32_t ASPIRATIONAL = 4 << 21;
     35 const int32_t RECOMMENDED = 5 << 21;
     36 
     37 // Bits 31..24: Single-bit flags
     38 const int32_t RTL = 1 << 24;
     39 const int32_t LB_LETTERS = 1 << 25;
     40 const int32_t CASED = 1 << 26;
     41 
     42 const int32_t SCRIPT_PROPS[] = {
     43     // Begin copy-paste output from
     44     // tools/trunk/unicode/py/parsescriptmetadata.py
     45     0x0040 | RECOMMENDED,  // Zyyy
     46     0x0308 | RECOMMENDED,  // Zinh
     47     0x0628 | RECOMMENDED | RTL,  // Arab
     48     0x0531 | RECOMMENDED | CASED,  // Armn
     49     0x0995 | RECOMMENDED,  // Beng
     50     0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
     51     0x13C4 | LIMITED_USE | CASED,  // Cher
     52     0x03E2 | EXCLUSION | CASED,  // Copt
     53     0x042F | RECOMMENDED | CASED,  // Cyrl
     54     0x10414 | EXCLUSION | CASED,  // Dsrt
     55     0x0905 | RECOMMENDED,  // Deva
     56     0x12A0 | RECOMMENDED,  // Ethi
     57     0x10D3 | RECOMMENDED,  // Geor
     58     0x10330 | EXCLUSION,  // Goth
     59     0x03A9 | RECOMMENDED | CASED,  // Grek
     60     0x0A95 | RECOMMENDED,  // Gujr
     61     0x0A15 | RECOMMENDED,  // Guru
     62     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
     63     0xAC00 | RECOMMENDED,  // Hang
     64     0x05D0 | RECOMMENDED | RTL,  // Hebr
     65     0x304B | RECOMMENDED | LB_LETTERS,  // Hira
     66     0x0C95 | RECOMMENDED,  // Knda
     67     0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
     68     0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
     69     0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
     70     0x004C | RECOMMENDED | CASED,  // Latn
     71     0x0D15 | RECOMMENDED,  // Mlym
     72     0x1826 | ASPIRATIONAL,  // Mong
     73     0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
     74     0x168F | EXCLUSION,  // Ogam
     75     0x10308 | EXCLUSION,  // Ital
     76     0x0B15 | RECOMMENDED,  // Orya
     77     0x16A0 | EXCLUSION,  // Runr
     78     0x0D85 | RECOMMENDED,  // Sinh
     79     0x0710 | LIMITED_USE | RTL,  // Syrc
     80     0x0B95 | RECOMMENDED,  // Taml
     81     0x0C15 | RECOMMENDED,  // Telu
     82     0x078C | RECOMMENDED | RTL,  // Thaa
     83     0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
     84     0x0F40 | RECOMMENDED,  // Tibt
     85     0x14C0 | ASPIRATIONAL,  // Cans
     86     0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
     87     0x1703 | EXCLUSION,  // Tglg
     88     0x1723 | EXCLUSION,  // Hano
     89     0x1743 | EXCLUSION,  // Buhd
     90     0x1763 | EXCLUSION,  // Tagb
     91     0x280E | UNKNOWN,  // Brai
     92     0x10800 | EXCLUSION | RTL,  // Cprt
     93     0x1900 | LIMITED_USE,  // Limb
     94     0x10000 | EXCLUSION,  // Linb
     95     0x10480 | EXCLUSION,  // Osma
     96     0x10450 | EXCLUSION,  // Shaw
     97     0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
     98     0x10380 | EXCLUSION,  // Ugar
     99     0,
    100     0x1A00 | EXCLUSION,  // Bugi
    101     0x2C00 | EXCLUSION | CASED,  // Glag
    102     0x10A00 | EXCLUSION | RTL,  // Khar
    103     0xA800 | LIMITED_USE,  // Sylo
    104     0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
    105     0x2D5E | ASPIRATIONAL,  // Tfng
    106     0x103A0 | EXCLUSION,  // Xpeo
    107     0x1B05 | LIMITED_USE,  // Bali
    108     0x1BC0 | LIMITED_USE,  // Batk
    109     0,
    110     0x11005 | EXCLUSION,  // Brah
    111     0xAA00 | LIMITED_USE,  // Cham
    112     0,
    113     0,
    114     0,
    115     0,
    116     0x13153 | EXCLUSION,  // Egyp
    117     0,
    118     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
    119     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
    120     0x16B1C | EXCLUSION,  // Hmng
    121     0x10CA1 | EXCLUSION | RTL | CASED,  // Hung
    122     0,
    123     0xA984 | LIMITED_USE,  // Java
    124     0xA90A | LIMITED_USE,  // Kali
    125     0,
    126     0,
    127     0x1C00 | LIMITED_USE,  // Lepc
    128     0x10647 | EXCLUSION,  // Lina
    129     0x0840 | LIMITED_USE | RTL,  // Mand
    130     0,
    131     0x10980 | EXCLUSION | RTL,  // Mero
    132     0x07D8 | LIMITED_USE | RTL,  // Nkoo
    133     0x10C00 | EXCLUSION | RTL,  // Orkh
    134     0x1036B | EXCLUSION,  // Perm
    135     0xA840 | EXCLUSION,  // Phag
    136     0x10900 | EXCLUSION | RTL,  // Phnx
    137     0x16F00 | ASPIRATIONAL,  // Plrd
    138     0,
    139     0,
    140     0,
    141     0,
    142     0,
    143     0,
    144     0xA549 | LIMITED_USE,  // Vaii
    145     0,
    146     0x12000 | EXCLUSION,  // Xsux
    147     0,
    148     0xFDD0 | UNKNOWN,  // Zzzz
    149     0x102B7 | EXCLUSION,  // Cari
    150     0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
    151     0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
    152     0x10280 | EXCLUSION,  // Lyci
    153     0x10920 | EXCLUSION | RTL,  // Lydi
    154     0x1C5A | LIMITED_USE,  // Olck
    155     0xA930 | EXCLUSION,  // Rjng
    156     0xA882 | LIMITED_USE,  // Saur
    157     0x1D850 | EXCLUSION,  // Sgnw
    158     0x1B83 | LIMITED_USE,  // Sund
    159     0,
    160     0xABC0 | LIMITED_USE,  // Mtei
    161     0x10840 | EXCLUSION | RTL,  // Armi
    162     0x10B00 | EXCLUSION | RTL,  // Avst
    163     0x11103 | LIMITED_USE,  // Cakm
    164     0xAC00 | RECOMMENDED,  // Kore
    165     0x11083 | EXCLUSION,  // Kthi
    166     0x10AC1 | EXCLUSION | RTL,  // Mani
    167     0x10B60 | EXCLUSION | RTL,  // Phli
    168     0x10B8F | EXCLUSION | RTL,  // Phlp
    169     0,
    170     0x10B40 | EXCLUSION | RTL,  // Prti
    171     0x0800 | EXCLUSION | RTL,  // Samr
    172     0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
    173     0,
    174     0,
    175     0xA6A0 | LIMITED_USE,  // Bamu
    176     0xA4E8 | LIMITED_USE,  // Lisu
    177     0,
    178     0x10A60 | EXCLUSION | RTL,  // Sarb
    179     0x16AE6 | EXCLUSION,  // Bass
    180     0x1BC20 | EXCLUSION,  // Dupl
    181     0x10500 | EXCLUSION,  // Elba
    182     0x11315 | EXCLUSION,  // Gran
    183     0,
    184     0,
    185     0x1E802 | EXCLUSION | RTL,  // Mend
    186     0x109A0 | EXCLUSION | RTL,  // Merc
    187     0x10A95 | EXCLUSION | RTL,  // Narb
    188     0x10896 | EXCLUSION | RTL,  // Nbat
    189     0x10873 | EXCLUSION | RTL,  // Palm
    190     0x112BE | EXCLUSION,  // Sind
    191     0x118B4 | EXCLUSION | CASED,  // Wara
    192     0,
    193     0,
    194     0x16A4F | EXCLUSION,  // Mroo
    195     0,
    196     0x11183 | EXCLUSION,  // Shrd
    197     0x110D0 | EXCLUSION,  // Sora
    198     0x11680 | EXCLUSION,  // Takr
    199     0,
    200     0,
    201     0x14400 | EXCLUSION,  // Hluw
    202     0x11208 | EXCLUSION,  // Khoj
    203     0x11484 | EXCLUSION,  // Tirh
    204     0x10537 | EXCLUSION,  // Aghb
    205     0x11152 | EXCLUSION,  // Mahj
    206     0x11717 | EXCLUSION | LB_LETTERS,  // Ahom
    207     0x108F4 | EXCLUSION | RTL,  // Hatr
    208     0x1160E | EXCLUSION,  // Modi
    209     0x1128F | EXCLUSION,  // Mult
    210     0x11AC0 | EXCLUSION,  // Pauc
    211     0x1158E | EXCLUSION,  // Sidd
    212     // End copy-paste from parsescriptmetadata.py
    213 };
    214 
    215 int32_t getScriptProps(UScriptCode script) {
    216     if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
    217         return SCRIPT_PROPS[script];
    218     } else {
    219         return 0;
    220     }
    221 }
    222 
    223 }  // namespace
    224 
    225 U_CAPI int32_t U_EXPORT2
    226 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
    227     if(U_FAILURE(*pErrorCode)) { return 0; }
    228     if(capacity < 0 || (capacity > 0 && dest == NULL)) {
    229         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    230         return 0;
    231     }
    232     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
    233     int32_t length;
    234     if(sampleChar == 0) {
    235         length = 0;
    236     } else {
    237         length = U16_LENGTH(sampleChar);
    238         if(length <= capacity) {
    239             int32_t i = 0;
    240             U16_APPEND_UNSAFE(dest, i, sampleChar);
    241         }
    242     }
    243     return u_terminateUChars(dest, capacity, length, pErrorCode);
    244 }
    245 
    246 U_COMMON_API icu::UnicodeString U_EXPORT2
    247 uscript_getSampleUnicodeString(UScriptCode script) {
    248     icu::UnicodeString sample;
    249     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
    250     if(sampleChar != 0) {
    251         sample.append(sampleChar);
    252     }
    253     return sample;
    254 }
    255 
    256 U_CAPI UScriptUsage U_EXPORT2
    257 uscript_getUsage(UScriptCode script) {
    258     return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
    259 }
    260 
    261 U_CAPI UBool U_EXPORT2
    262 uscript_isRightToLeft(UScriptCode script) {
    263     return (getScriptProps(script) & RTL) != 0;
    264 }
    265 
    266 U_CAPI UBool U_EXPORT2
    267 uscript_breaksBetweenLetters(UScriptCode script) {
    268     return (getScriptProps(script) & LB_LETTERS) != 0;
    269 }
    270 
    271 U_CAPI UBool U_EXPORT2
    272 uscript_isCased(UScriptCode script) {
    273     return (getScriptProps(script) & CASED) != 0;
    274 }
    275