Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2013, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  uscript_props.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2013feb16
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include "unicode/utypes.h"
     16 #include "unicode/unistr.h"
     17 #include "unicode/uscript.h"
     18 #include "unicode/utf16.h"
     19 #include "ustr_imp.h"
     20 
     21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     22 
     23 namespace {
     24 
     25 // Script metadata (script properties).
     26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
     27 
     28 // 0 = NOT_ENCODED, no sample character, default false script properties.
     29 // Bits 20.. 0: sample character
     30 
     31 // Bits 23..21: usage
     32 const int32_t UNKNOWN = 1 << 21;
     33 const int32_t EXCLUSION = 2 << 21;
     34 const int32_t LIMITED_USE = 3 << 21;
     35 const int32_t ASPIRATIONAL = 4 << 21;
     36 const int32_t RECOMMENDED = 5 << 21;
     37 
     38 // Bits 31..24: Single-bit flags
     39 const int32_t RTL = 1 << 24;
     40 const int32_t LB_LETTERS = 1 << 25;
     41 const int32_t CASED = 1 << 26;
     42 
     43 const int32_t SCRIPT_PROPS[] = {
     44     // Begin copy-paste output from
     45     // tools/trunk/unicode/py/parsescriptmetadata.py
     46     0x0040 | UNKNOWN,  // Zyyy
     47     0x0308 | UNKNOWN,  // Zinh
     48     0x0628 | RECOMMENDED | RTL,  // Arab
     49     0x0531 | RECOMMENDED | CASED,  // Armn
     50     0x0995 | RECOMMENDED,  // Beng
     51     0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
     52     0x13C4 | LIMITED_USE,  // Cher
     53     0x03E2 | EXCLUSION | CASED,  // Copt
     54     0x042F | RECOMMENDED | CASED,  // Cyrl
     55     0x10414 | EXCLUSION | CASED,  // Dsrt
     56     0x0905 | RECOMMENDED,  // Deva
     57     0x12A0 | RECOMMENDED,  // Ethi
     58     0x10D3 | RECOMMENDED,  // Geor
     59     0x10330 | EXCLUSION,  // Goth
     60     0x03A9 | RECOMMENDED | CASED,  // Grek
     61     0x0A95 | RECOMMENDED,  // Gujr
     62     0x0A15 | RECOMMENDED,  // Guru
     63     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
     64     0xAC00 | RECOMMENDED,  // Hang
     65     0x05D0 | RECOMMENDED | RTL,  // Hebr
     66     0x304B | RECOMMENDED | LB_LETTERS,  // Hira
     67     0x0C95 | RECOMMENDED,  // Knda
     68     0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
     69     0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
     70     0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
     71     0x004C | RECOMMENDED | CASED,  // Latn
     72     0x0D15 | RECOMMENDED,  // Mlym
     73     0x1826 | ASPIRATIONAL,  // Mong
     74     0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
     75     0x168F | EXCLUSION,  // Ogam
     76     0x10300 | EXCLUSION,  // Ital
     77     0x0B15 | RECOMMENDED,  // Orya
     78     0x16A0 | EXCLUSION,  // Runr
     79     0x0D85 | RECOMMENDED,  // Sinh
     80     0x0710 | LIMITED_USE | RTL,  // Syrc
     81     0x0B95 | RECOMMENDED,  // Taml
     82     0x0C15 | RECOMMENDED,  // Telu
     83     0x078C | RECOMMENDED | RTL,  // Thaa
     84     0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
     85     0x0F40 | RECOMMENDED,  // Tibt
     86     0x14C0 | ASPIRATIONAL,  // Cans
     87     0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
     88     0x1703 | EXCLUSION,  // Tglg
     89     0x1723 | EXCLUSION,  // Hano
     90     0x1743 | EXCLUSION,  // Buhd
     91     0x1763 | EXCLUSION,  // Tagb
     92     0x2800 | UNKNOWN,  // Brai
     93     0x10800 | EXCLUSION | RTL,  // Cprt
     94     0x1900 | LIMITED_USE,  // Limb
     95     0x10000 | EXCLUSION,  // Linb
     96     0x10480 | EXCLUSION,  // Osma
     97     0x10450 | EXCLUSION,  // Shaw
     98     0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
     99     0x10380 | EXCLUSION,  // Ugar
    100     0,
    101     0x1A00 | EXCLUSION,  // Bugi
    102     0x2C00 | EXCLUSION | CASED,  // Glag
    103     0x10A00 | EXCLUSION | RTL,  // Khar
    104     0xA800 | LIMITED_USE,  // Sylo
    105     0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
    106     0x2D30 | ASPIRATIONAL,  // Tfng
    107     0x103A0 | EXCLUSION,  // Xpeo
    108     0x1B05 | LIMITED_USE | LB_LETTERS,  // Bali
    109     0x1BC0 | LIMITED_USE,  // Batk
    110     0,
    111     0x11005 | EXCLUSION,  // Brah
    112     0xAA00 | LIMITED_USE,  // Cham
    113     0,
    114     0,
    115     0,
    116     0,
    117     0x13153 | EXCLUSION,  // Egyp
    118     0,
    119     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
    120     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
    121     0,
    122     0,
    123     0,
    124     0xA984 | LIMITED_USE | LB_LETTERS,  // Java
    125     0xA90A | LIMITED_USE,  // Kali
    126     0,
    127     0,
    128     0x1C00 | LIMITED_USE,  // Lepc
    129     0,
    130     0x0840 | LIMITED_USE | RTL,  // Mand
    131     0,
    132     0x10980 | EXCLUSION | RTL,  // Mero
    133     0x07CA | LIMITED_USE | RTL,  // Nkoo
    134     0x10C00 | EXCLUSION | RTL,  // Orkh
    135     0,
    136     0xA840 | EXCLUSION,  // Phag
    137     0x10900 | EXCLUSION | RTL,  // Phnx
    138     0x16F00 | ASPIRATIONAL,  // Plrd
    139     0,
    140     0,
    141     0,
    142     0,
    143     0,
    144     0,
    145     0xA549 | LIMITED_USE,  // Vaii
    146     0,
    147     0x12000 | EXCLUSION,  // Xsux
    148     0,
    149     0xFDD0 | UNKNOWN,  // Zzzz
    150     0x102A0 | EXCLUSION,  // Cari
    151     0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
    152     0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
    153     0x10280 | EXCLUSION,  // Lyci
    154     0x10920 | EXCLUSION | RTL,  // Lydi
    155     0x1C5A | LIMITED_USE,  // Olck
    156     0xA930 | EXCLUSION,  // Rjng
    157     0xA882 | LIMITED_USE,  // Saur
    158     0,
    159     0x1B83 | LIMITED_USE,  // Sund
    160     0,
    161     0xABC0 | LIMITED_USE,  // Mtei
    162     0x10840 | EXCLUSION | RTL,  // Armi
    163     0x10B00 | EXCLUSION | RTL,  // Avst
    164     0x11103 | LIMITED_USE,  // Cakm
    165     0xAC00 | RECOMMENDED,  // Kore
    166     0x11083 | EXCLUSION,  // Kthi
    167     0,
    168     0x10B60 | EXCLUSION | RTL,  // Phli
    169     0,
    170     0,
    171     0x10B40 | EXCLUSION | RTL,  // Prti
    172     0x0800 | EXCLUSION | RTL,  // Samr
    173     0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
    174     0,
    175     0,
    176     0xA6A0 | LIMITED_USE,  // Bamu
    177     0xA4D0 | LIMITED_USE,  // Lisu
    178     0,
    179     0x10A60 | EXCLUSION | RTL,  // Sarb
    180     0,
    181     0,
    182     0,
    183     0,
    184     0,
    185     0,
    186     0,
    187     0x109A0 | EXCLUSION | RTL,  // Merc
    188     0,
    189     0,
    190     0,
    191     0,
    192     0,
    193     0,
    194     0,
    195     0,
    196     0,
    197     0x11183 | EXCLUSION,  // Shrd
    198     0x110D0 | EXCLUSION,  // Sora
    199     0x11680 | EXCLUSION,  // Takr
    200     0,
    201     0,
    202     0,
    203     0,
    204     0,
    205     // End copy-paste from parsescriptmetadata.py
    206 };
    207 
    208 int32_t getScriptProps(UScriptCode script) {
    209     if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
    210         return SCRIPT_PROPS[script];
    211     } else {
    212         return 0;
    213     }
    214 }
    215 
    216 }  // namespace
    217 
    218 U_CAPI int32_t U_EXPORT2
    219 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
    220     if(U_FAILURE(*pErrorCode)) { return 0; }
    221     if(capacity < 0 || (capacity > 0 && dest == NULL)) {
    222         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    223         return 0;
    224     }
    225     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
    226     int32_t length;
    227     if(sampleChar == 0) {
    228         length = 0;
    229     } else {
    230         length = U16_LENGTH(sampleChar);
    231         if(length <= capacity) {
    232             int32_t i = 0;
    233             U16_APPEND_UNSAFE(dest, i, sampleChar);
    234         }
    235     }
    236     return u_terminateUChars(dest, capacity, length, pErrorCode);
    237 }
    238 
    239 U_COMMON_API icu::UnicodeString U_EXPORT2
    240 uscript_getSampleUnicodeString(UScriptCode script) {
    241     icu::UnicodeString sample;
    242     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
    243     if(sampleChar != 0) {
    244         sample.append(sampleChar);
    245     }
    246     return sample;
    247 }
    248 
    249 U_CAPI UScriptUsage U_EXPORT2
    250 uscript_getUsage(UScriptCode script) {
    251     return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
    252 }
    253 
    254 U_CAPI UBool U_EXPORT2
    255 uscript_isRightToLeft(UScriptCode script) {
    256     return (getScriptProps(script) & RTL) != 0;
    257 }
    258 
    259 U_CAPI UBool U_EXPORT2
    260 uscript_breaksBetweenLetters(UScriptCode script) {
    261     return (getScriptProps(script) & LB_LETTERS) != 0;
    262 }
    263 
    264 U_CAPI UBool U_EXPORT2
    265 uscript_isCased(UScriptCode script) {
    266     return (getScriptProps(script) & CASED) != 0;
    267 }
    268