Home | History | Annotate | Download | only in libmedia
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "autodetect.h"
     18 
     19 typedef struct CharRange {
     20     uint16_t first;
     21     uint16_t last;
     22 };
     23 
     24 #define ARRAY_SIZE(x)   (sizeof(x) / sizeof(*x))
     25 
     26 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
     27 static const CharRange kShiftJISRanges[] = {
     28     { 0x8140, 0x817E },
     29     { 0x8180, 0x81AC },
     30     { 0x81B8, 0x81BF },
     31     { 0x81C8, 0x81CE },
     32     { 0x81DA, 0x81E8 },
     33     { 0x81F0, 0x81F7 },
     34     { 0x81FC, 0x81FC },
     35     { 0x824F, 0x8258 },
     36     { 0x8260, 0x8279 },
     37     { 0x8281, 0x829A },
     38     { 0x829F, 0x82F1 },
     39     { 0x8340, 0x837E },
     40     { 0x8380, 0x8396 },
     41     { 0x839F, 0x83B6 },
     42     { 0x83BF, 0x83D6 },
     43     { 0x8440, 0x8460 },
     44     { 0x8470, 0x847E },
     45     { 0x8480, 0x8491 },
     46     { 0x849F, 0x84BE },
     47     { 0x8740, 0x875D },
     48     { 0x875F, 0x8775 },
     49     { 0x877E, 0x877E },
     50     { 0x8780, 0x879C },
     51     { 0x889F, 0x88FC },
     52     { 0x8940, 0x897E },
     53     { 0x8980, 0x89FC },
     54     { 0x8A40, 0x8A7E },
     55     { 0x8A80, 0x8AFC },
     56     { 0x8B40, 0x8B7E },
     57     { 0x8B80, 0x8BFC },
     58     { 0x8C40, 0x8C7E },
     59     { 0x8C80, 0x8CFC },
     60     { 0x8D40, 0x8D7E },
     61     { 0x8D80, 0x8DFC },
     62     { 0x8E40, 0x8E7E },
     63     { 0x8E80, 0x8EFC },
     64     { 0x8F40, 0x8F7E },
     65     { 0x8F80, 0x8FFC },
     66     { 0x9040, 0x907E },
     67     { 0x9080, 0x90FC },
     68     { 0x9140, 0x917E },
     69     { 0x9180, 0x91FC },
     70     { 0x9240, 0x927E },
     71     { 0x9280, 0x92FC },
     72     { 0x9340, 0x937E },
     73     { 0x9380, 0x93FC },
     74     { 0x9440, 0x947E },
     75     { 0x9480, 0x94FC },
     76     { 0x9540, 0x957E },
     77     { 0x9580, 0x95FC },
     78     { 0x9640, 0x967E },
     79     { 0x9680, 0x96FC },
     80     { 0x9740, 0x977E },
     81     { 0x9780, 0x97FC },
     82     { 0x9840, 0x9872 },
     83     { 0x989F, 0x98FC },
     84     { 0x9940, 0x997E },
     85     { 0x9980, 0x99FC },
     86     { 0x9A40, 0x9A7E },
     87     { 0x9A80, 0x9AFC },
     88     { 0x9B40, 0x9B7E },
     89     { 0x9B80, 0x9BFC },
     90     { 0x9C40, 0x9C7E },
     91     { 0x9C80, 0x9CFC },
     92     { 0x9D40, 0x9D7E },
     93     { 0x9D80, 0x9DFC },
     94     { 0x9E40, 0x9E7E },
     95     { 0x9E80, 0x9EFC },
     96     { 0x9F40, 0x9F7E },
     97     { 0x9F80, 0x9FFC },
     98     { 0xE040, 0xE07E },
     99     { 0xE080, 0xE0FC },
    100     { 0xE140, 0xE17E },
    101     { 0xE180, 0xE1FC },
    102     { 0xE240, 0xE27E },
    103     { 0xE280, 0xE2FC },
    104     { 0xE340, 0xE37E },
    105     { 0xE380, 0xE3FC },
    106     { 0xE440, 0xE47E },
    107     { 0xE480, 0xE4FC },
    108     { 0xE540, 0xE57E },
    109     { 0xE580, 0xE5FC },
    110     { 0xE640, 0xE67E },
    111     { 0xE680, 0xE6FC },
    112     { 0xE740, 0xE77E },
    113     { 0xE780, 0xE7FC },
    114     { 0xE840, 0xE87E },
    115     { 0xE880, 0xE8FC },
    116     { 0xE940, 0xE97E },
    117     { 0xE980, 0xE9FC },
    118     { 0xEA40, 0xEA7E },
    119     { 0xEA80, 0xEAA4 },
    120     { 0xED40, 0xED7E },
    121     { 0xED80, 0xEDFC },
    122     { 0xEE40, 0xEE7E },
    123     { 0xEE80, 0xEEEC },
    124     { 0xEEEF, 0xEEFC },
    125     { 0xFA40, 0xFA7E },
    126     { 0xFA80, 0xFAFC },
    127     { 0xFB40, 0xFB7E },
    128     { 0xFB80, 0xFBFC },
    129     { 0xFC40, 0xFC4B },
    130 };
    131 
    132 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
    133 static const CharRange kGBKRanges[] = {
    134     { 0x8140, 0x817E },
    135     { 0x8180, 0x81FE },
    136     { 0x8240, 0x827E },
    137     { 0x8280, 0x82FE },
    138     { 0x8340, 0x837E },
    139     { 0x8380, 0x83FE },
    140     { 0x8440, 0x847E },
    141     { 0x8480, 0x84FE },
    142     { 0x8540, 0x857E },
    143     { 0x8580, 0x85FE },
    144     { 0x8640, 0x867E },
    145     { 0x8680, 0x86FE },
    146     { 0x8740, 0x877E },
    147     { 0x8780, 0x87FE },
    148     { 0x8840, 0x887E },
    149     { 0x8880, 0x88FE },
    150     { 0x8940, 0x897E },
    151     { 0x8980, 0x89FE },
    152     { 0x8A40, 0x8A7E },
    153     { 0x8A80, 0x8AFE },
    154     { 0x8B40, 0x8B7E },
    155     { 0x8B80, 0x8BFE },
    156     { 0x8C40, 0x8C7E },
    157     { 0x8C80, 0x8CFE },
    158     { 0x8D40, 0x8D7E },
    159     { 0x8D80, 0x8DFE },
    160     { 0x8E40, 0x8E7E },
    161     { 0x8E80, 0x8EFE },
    162     { 0x8F40, 0x8F7E },
    163     { 0x8F80, 0x8FFE },
    164     { 0x9040, 0x907E },
    165     { 0x9080, 0x90FE },
    166     { 0x9140, 0x917E },
    167     { 0x9180, 0x91FE },
    168     { 0x9240, 0x927E },
    169     { 0x9280, 0x92FE },
    170     { 0x9340, 0x937E },
    171     { 0x9380, 0x93FE },
    172     { 0x9440, 0x947E },
    173     { 0x9480, 0x94FE },
    174     { 0x9540, 0x957E },
    175     { 0x9580, 0x95FE },
    176     { 0x9640, 0x967E },
    177     { 0x9680, 0x96FE },
    178     { 0x9740, 0x977E },
    179     { 0x9780, 0x97FE },
    180     { 0x9840, 0x987E },
    181     { 0x9880, 0x98FE },
    182     { 0x9940, 0x997E },
    183     { 0x9980, 0x99FE },
    184     { 0x9A40, 0x9A7E },
    185     { 0x9A80, 0x9AFE },
    186     { 0x9B40, 0x9B7E },
    187     { 0x9B80, 0x9BFE },
    188     { 0x9C40, 0x9C7E },
    189     { 0x9C80, 0x9CFE },
    190     { 0x9D40, 0x9D7E },
    191     { 0x9D80, 0x9DFE },
    192     { 0x9E40, 0x9E7E },
    193     { 0x9E80, 0x9EFE },
    194     { 0x9F40, 0x9F7E },
    195     { 0x9F80, 0x9FFE },
    196     { 0xA040, 0xA07E },
    197     { 0xA080, 0xA0FE },
    198     { 0xA1A1, 0xA1FE },
    199     { 0xA2A1, 0xA2AA },
    200     { 0xA2B1, 0xA2E2 },
    201     { 0xA2E5, 0xA2EE },
    202     { 0xA2F1, 0xA2FC },
    203     { 0xA3A1, 0xA3FE },
    204     { 0xA4A1, 0xA4F3 },
    205     { 0xA5A1, 0xA5F6 },
    206     { 0xA6A1, 0xA6B8 },
    207     { 0xA6C1, 0xA6D8 },
    208     { 0xA6E0, 0xA6EB },
    209     { 0xA6EE, 0xA6F2 },
    210     { 0xA6F4, 0xA6F5 },
    211     { 0xA7A1, 0xA7C1 },
    212     { 0xA7D1, 0xA7F1 },
    213     { 0xA840, 0xA87E },
    214     { 0xA880, 0xA895 },
    215     { 0xA8A1, 0xA8BB },
    216     { 0xA8BD, 0xA8BE },
    217     { 0xA8C0, 0xA8C0 },
    218     { 0xA8C5, 0xA8E9 },
    219     { 0xA940, 0xA957 },
    220     { 0xA959, 0xA95A },
    221     { 0xA95C, 0xA95C },
    222     { 0xA960, 0xA97E },
    223     { 0xA980, 0xA988 },
    224     { 0xA996, 0xA996 },
    225     { 0xA9A4, 0xA9EF },
    226     { 0xAA40, 0xAA7E },
    227     { 0xAA80, 0xAAA0 },
    228     { 0xAB40, 0xAB7E },
    229     { 0xAB80, 0xABA0 },
    230     { 0xAC40, 0xAC7E },
    231     { 0xAC80, 0xACA0 },
    232     { 0xAD40, 0xAD7E },
    233     { 0xAD80, 0xADA0 },
    234     { 0xAE40, 0xAE7E },
    235     { 0xAE80, 0xAEA0 },
    236     { 0xAF40, 0xAF7E },
    237     { 0xAF80, 0xAFA0 },
    238     { 0xB040, 0xB07E },
    239     { 0xB080, 0xB0FE },
    240     { 0xB140, 0xB17E },
    241     { 0xB180, 0xB1FE },
    242     { 0xB240, 0xB27E },
    243     { 0xB280, 0xB2FE },
    244     { 0xB340, 0xB37E },
    245     { 0xB380, 0xB3FE },
    246     { 0xB440, 0xB47E },
    247     { 0xB480, 0xB4FE },
    248     { 0xB540, 0xB57E },
    249     { 0xB580, 0xB5FE },
    250     { 0xB640, 0xB67E },
    251     { 0xB680, 0xB6FE },
    252     { 0xB740, 0xB77E },
    253     { 0xB780, 0xB7FE },
    254     { 0xB840, 0xB87E },
    255     { 0xB880, 0xB8FE },
    256     { 0xB940, 0xB97E },
    257     { 0xB980, 0xB9FE },
    258     { 0xBA40, 0xBA7E },
    259     { 0xBA80, 0xBAFE },
    260     { 0xBB40, 0xBB7E },
    261     { 0xBB80, 0xBBFE },
    262     { 0xBC40, 0xBC7E },
    263     { 0xBC80, 0xBCFE },
    264     { 0xBD40, 0xBD7E },
    265     { 0xBD80, 0xBDFE },
    266     { 0xBE40, 0xBE7E },
    267     { 0xBE80, 0xBEFE },
    268     { 0xBF40, 0xBF7E },
    269     { 0xBF80, 0xBFFE },
    270     { 0xC040, 0xC07E },
    271     { 0xC080, 0xC0FE },
    272     { 0xC140, 0xC17E },
    273     { 0xC180, 0xC1FE },
    274     { 0xC240, 0xC27E },
    275     { 0xC280, 0xC2FE },
    276     { 0xC340, 0xC37E },
    277     { 0xC380, 0xC3FE },
    278     { 0xC440, 0xC47E },
    279     { 0xC480, 0xC4FE },
    280     { 0xC540, 0xC57E },
    281     { 0xC580, 0xC5FE },
    282     { 0xC640, 0xC67E },
    283     { 0xC680, 0xC6FE },
    284     { 0xC740, 0xC77E },
    285     { 0xC780, 0xC7FE },
    286     { 0xC840, 0xC87E },
    287     { 0xC880, 0xC8FE },
    288     { 0xC940, 0xC97E },
    289     { 0xC980, 0xC9FE },
    290     { 0xCA40, 0xCA7E },
    291     { 0xCA80, 0xCAFE },
    292     { 0xCB40, 0xCB7E },
    293     { 0xCB80, 0xCBFE },
    294     { 0xCC40, 0xCC7E },
    295     { 0xCC80, 0xCCFE },
    296     { 0xCD40, 0xCD7E },
    297     { 0xCD80, 0xCDFE },
    298     { 0xCE40, 0xCE7E },
    299     { 0xCE80, 0xCEFE },
    300     { 0xCF40, 0xCF7E },
    301     { 0xCF80, 0xCFFE },
    302     { 0xD040, 0xD07E },
    303     { 0xD080, 0xD0FE },
    304     { 0xD140, 0xD17E },
    305     { 0xD180, 0xD1FE },
    306     { 0xD240, 0xD27E },
    307     { 0xD280, 0xD2FE },
    308     { 0xD340, 0xD37E },
    309     { 0xD380, 0xD3FE },
    310     { 0xD440, 0xD47E },
    311     { 0xD480, 0xD4FE },
    312     { 0xD540, 0xD57E },
    313     { 0xD580, 0xD5FE },
    314     { 0xD640, 0xD67E },
    315     { 0xD680, 0xD6FE },
    316     { 0xD740, 0xD77E },
    317     { 0xD780, 0xD7F9 },
    318     { 0xD840, 0xD87E },
    319     { 0xD880, 0xD8FE },
    320     { 0xD940, 0xD97E },
    321     { 0xD980, 0xD9FE },
    322     { 0xDA40, 0xDA7E },
    323     { 0xDA80, 0xDAFE },
    324     { 0xDB40, 0xDB7E },
    325     { 0xDB80, 0xDBFE },
    326     { 0xDC40, 0xDC7E },
    327     { 0xDC80, 0xDCFE },
    328     { 0xDD40, 0xDD7E },
    329     { 0xDD80, 0xDDFE },
    330     { 0xDE40, 0xDE7E },
    331     { 0xDE80, 0xDEFE },
    332     { 0xDF40, 0xDF7E },
    333     { 0xDF80, 0xDFFE },
    334     { 0xE040, 0xE07E },
    335     { 0xE080, 0xE0FE },
    336     { 0xE140, 0xE17E },
    337     { 0xE180, 0xE1FE },
    338     { 0xE240, 0xE27E },
    339     { 0xE280, 0xE2FE },
    340     { 0xE340, 0xE37E },
    341     { 0xE380, 0xE3FE },
    342     { 0xE440, 0xE47E },
    343     { 0xE480, 0xE4FE },
    344     { 0xE540, 0xE57E },
    345     { 0xE580, 0xE5FE },
    346     { 0xE640, 0xE67E },
    347     { 0xE680, 0xE6FE },
    348     { 0xE740, 0xE77E },
    349     { 0xE780, 0xE7FE },
    350     { 0xE840, 0xE87E },
    351     { 0xE880, 0xE8FE },
    352     { 0xE940, 0xE97E },
    353     { 0xE980, 0xE9FE },
    354     { 0xEA40, 0xEA7E },
    355     { 0xEA80, 0xEAFE },
    356     { 0xEB40, 0xEB7E },
    357     { 0xEB80, 0xEBFE },
    358     { 0xEC40, 0xEC7E },
    359     { 0xEC80, 0xECFE },
    360     { 0xED40, 0xED7E },
    361     { 0xED80, 0xEDFE },
    362     { 0xEE40, 0xEE7E },
    363     { 0xEE80, 0xEEFE },
    364     { 0xEF40, 0xEF7E },
    365     { 0xEF80, 0xEFFE },
    366     { 0xF040, 0xF07E },
    367     { 0xF080, 0xF0FE },
    368     { 0xF140, 0xF17E },
    369     { 0xF180, 0xF1FE },
    370     { 0xF240, 0xF27E },
    371     { 0xF280, 0xF2FE },
    372     { 0xF340, 0xF37E },
    373     { 0xF380, 0xF3FE },
    374     { 0xF440, 0xF47E },
    375     { 0xF480, 0xF4FE },
    376     { 0xF540, 0xF57E },
    377     { 0xF580, 0xF5FE },
    378     { 0xF640, 0xF67E },
    379     { 0xF680, 0xF6FE },
    380     { 0xF740, 0xF77E },
    381     { 0xF780, 0xF7FE },
    382     { 0xF840, 0xF87E },
    383     { 0xF880, 0xF8A0 },
    384     { 0xF940, 0xF97E },
    385     { 0xF980, 0xF9A0 },
    386     { 0xFA40, 0xFA7E },
    387     { 0xFA80, 0xFAA0 },
    388     { 0xFB40, 0xFB7E },
    389     { 0xFB80, 0xFBA0 },
    390     { 0xFC40, 0xFC7E },
    391     { 0xFC80, 0xFCA0 },
    392     { 0xFD40, 0xFD7E },
    393     { 0xFD80, 0xFDA0 },
    394     { 0xFE40, 0xFE4F },
    395 };
    396 
    397 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT
    398 static const CharRange kEUCKRRanges[] = {
    399     { 0x8141, 0x815A },
    400     { 0x8161, 0x817A },
    401     { 0x8181, 0x81FE },
    402     { 0x8241, 0x825A },
    403     { 0x8261, 0x827A },
    404     { 0x8281, 0x82FE },
    405     { 0x8341, 0x835A },
    406     { 0x8361, 0x837A },
    407     { 0x8381, 0x83FE },
    408     { 0x8441, 0x845A },
    409     { 0x8461, 0x847A },
    410     { 0x8481, 0x84FE },
    411     { 0x8541, 0x855A },
    412     { 0x8561, 0x857A },
    413     { 0x8581, 0x85FE },
    414     { 0x8641, 0x865A },
    415     { 0x8661, 0x867A },
    416     { 0x8681, 0x86FE },
    417     { 0x8741, 0x875A },
    418     { 0x8761, 0x877A },
    419     { 0x8781, 0x87FE },
    420     { 0x8841, 0x885A },
    421     { 0x8861, 0x887A },
    422     { 0x8881, 0x88FE },
    423     { 0x8941, 0x895A },
    424     { 0x8961, 0x897A },
    425     { 0x8981, 0x89FE },
    426     { 0x8A41, 0x8A5A },
    427     { 0x8A61, 0x8A7A },
    428     { 0x8A81, 0x8AFE },
    429     { 0x8B41, 0x8B5A },
    430     { 0x8B61, 0x8B7A },
    431     { 0x8B81, 0x8BFE },
    432     { 0x8C41, 0x8C5A },
    433     { 0x8C61, 0x8C7A },
    434     { 0x8C81, 0x8CFE },
    435     { 0x8D41, 0x8D5A },
    436     { 0x8D61, 0x8D7A },
    437     { 0x8D81, 0x8DFE },
    438     { 0x8E41, 0x8E5A },
    439     { 0x8E61, 0x8E7A },
    440     { 0x8E81, 0x8EFE },
    441     { 0x8F41, 0x8F5A },
    442     { 0x8F61, 0x8F7A },
    443     { 0x8F81, 0x8FFE },
    444     { 0x9041, 0x905A },
    445     { 0x9061, 0x907A },
    446     { 0x9081, 0x90FE },
    447     { 0x9141, 0x915A },
    448     { 0x9161, 0x917A },
    449     { 0x9181, 0x91FE },
    450     { 0x9241, 0x925A },
    451     { 0x9261, 0x927A },
    452     { 0x9281, 0x92FE },
    453     { 0x9341, 0x935A },
    454     { 0x9361, 0x937A },
    455     { 0x9381, 0x93FE },
    456     { 0x9441, 0x945A },
    457     { 0x9461, 0x947A },
    458     { 0x9481, 0x94FE },
    459     { 0x9541, 0x955A },
    460     { 0x9561, 0x957A },
    461     { 0x9581, 0x95FE },
    462     { 0x9641, 0x965A },
    463     { 0x9661, 0x967A },
    464     { 0x9681, 0x96FE },
    465     { 0x9741, 0x975A },
    466     { 0x9761, 0x977A },
    467     { 0x9781, 0x97FE },
    468     { 0x9841, 0x985A },
    469     { 0x9861, 0x987A },
    470     { 0x9881, 0x98FE },
    471     { 0x9941, 0x995A },
    472     { 0x9961, 0x997A },
    473     { 0x9981, 0x99FE },
    474     { 0x9A41, 0x9A5A },
    475     { 0x9A61, 0x9A7A },
    476     { 0x9A81, 0x9AFE },
    477     { 0x9B41, 0x9B5A },
    478     { 0x9B61, 0x9B7A },
    479     { 0x9B81, 0x9BFE },
    480     { 0x9C41, 0x9C5A },
    481     { 0x9C61, 0x9C7A },
    482     { 0x9C81, 0x9CFE },
    483     { 0x9D41, 0x9D5A },
    484     { 0x9D61, 0x9D7A },
    485     { 0x9D81, 0x9DFE },
    486     { 0x9E41, 0x9E5A },
    487     { 0x9E61, 0x9E7A },
    488     { 0x9E81, 0x9EFE },
    489     { 0x9F41, 0x9F5A },
    490     { 0x9F61, 0x9F7A },
    491     { 0x9F81, 0x9FFE },
    492     { 0xA041, 0xA05A },
    493     { 0xA061, 0xA07A },
    494     { 0xA081, 0xA0FE },
    495     { 0xA141, 0xA15A },
    496     { 0xA161, 0xA17A },
    497     { 0xA181, 0xA1FE },
    498     { 0xA241, 0xA25A },
    499     { 0xA261, 0xA27A },
    500     { 0xA281, 0xA2E7 },
    501     { 0xA341, 0xA35A },
    502     { 0xA361, 0xA37A },
    503     { 0xA381, 0xA3FE },
    504     { 0xA441, 0xA45A },
    505     { 0xA461, 0xA47A },
    506     { 0xA481, 0xA4FE },
    507     { 0xA541, 0xA55A },
    508     { 0xA561, 0xA57A },
    509     { 0xA581, 0xA5AA },
    510     { 0xA5B0, 0xA5B9 },
    511     { 0xA5C1, 0xA5D8 },
    512     { 0xA5E1, 0xA5F8 },
    513     { 0xA641, 0xA65A },
    514     { 0xA661, 0xA67A },
    515     { 0xA681, 0xA6E4 },
    516     { 0xA741, 0xA75A },
    517     { 0xA761, 0xA77A },
    518     { 0xA781, 0xA7EF },
    519     { 0xA841, 0xA85A },
    520     { 0xA861, 0xA87A },
    521     { 0xA881, 0xA8A4 },
    522     { 0xA8A6, 0xA8A6 },
    523     { 0xA8A8, 0xA8AF },
    524     { 0xA8B1, 0xA8FE },
    525     { 0xA941, 0xA95A },
    526     { 0xA961, 0xA97A },
    527     { 0xA981, 0xA9FE },
    528     { 0xAA41, 0xAA5A },
    529     { 0xAA61, 0xAA7A },
    530     { 0xAA81, 0xAAF3 },
    531     { 0xAB41, 0xAB5A },
    532     { 0xAB61, 0xAB7A },
    533     { 0xAB81, 0xABF6 },
    534     { 0xAC41, 0xAC5A },
    535     { 0xAC61, 0xAC7A },
    536     { 0xAC81, 0xACC1 },
    537     { 0xACD1, 0xACF1 },
    538     { 0xAD41, 0xAD5A },
    539     { 0xAD61, 0xAD7A },
    540     { 0xAD81, 0xADA0 },
    541     { 0xAE41, 0xAE5A },
    542     { 0xAE61, 0xAE7A },
    543     { 0xAE81, 0xAEA0 },
    544     { 0xAF41, 0xAF5A },
    545     { 0xAF61, 0xAF7A },
    546     { 0xAF81, 0xAFA0 },
    547     { 0xB041, 0xB05A },
    548     { 0xB061, 0xB07A },
    549     { 0xB081, 0xB0FE },
    550     { 0xB141, 0xB15A },
    551     { 0xB161, 0xB17A },
    552     { 0xB181, 0xB1FE },
    553     { 0xB241, 0xB25A },
    554     { 0xB261, 0xB27A },
    555     { 0xB281, 0xB2FE },
    556     { 0xB341, 0xB35A },
    557     { 0xB361, 0xB37A },
    558     { 0xB381, 0xB3FE },
    559     { 0xB441, 0xB45A },
    560     { 0xB461, 0xB47A },
    561     { 0xB481, 0xB4FE },
    562     { 0xB541, 0xB55A },
    563     { 0xB561, 0xB57A },
    564     { 0xB581, 0xB5FE },
    565     { 0xB641, 0xB65A },
    566     { 0xB661, 0xB67A },
    567     { 0xB681, 0xB6FE },
    568     { 0xB741, 0xB75A },
    569     { 0xB761, 0xB77A },
    570     { 0xB781, 0xB7FE },
    571     { 0xB841, 0xB85A },
    572     { 0xB861, 0xB87A },
    573     { 0xB881, 0xB8FE },
    574     { 0xB941, 0xB95A },
    575     { 0xB961, 0xB97A },
    576     { 0xB981, 0xB9FE },
    577     { 0xBA41, 0xBA5A },
    578     { 0xBA61, 0xBA7A },
    579     { 0xBA81, 0xBAFE },
    580     { 0xBB41, 0xBB5A },
    581     { 0xBB61, 0xBB7A },
    582     { 0xBB81, 0xBBFE },
    583     { 0xBC41, 0xBC5A },
    584     { 0xBC61, 0xBC7A },
    585     { 0xBC81, 0xBCFE },
    586     { 0xBD41, 0xBD5A },
    587     { 0xBD61, 0xBD7A },
    588     { 0xBD81, 0xBDFE },
    589     { 0xBE41, 0xBE5A },
    590     { 0xBE61, 0xBE7A },
    591     { 0xBE81, 0xBEFE },
    592     { 0xBF41, 0xBF5A },
    593     { 0xBF61, 0xBF7A },
    594     { 0xBF81, 0xBFFE },
    595     { 0xC041, 0xC05A },
    596     { 0xC061, 0xC07A },
    597     { 0xC081, 0xC0FE },
    598     { 0xC141, 0xC15A },
    599     { 0xC161, 0xC17A },
    600     { 0xC181, 0xC1FE },
    601     { 0xC241, 0xC25A },
    602     { 0xC261, 0xC27A },
    603     { 0xC281, 0xC2FE },
    604     { 0xC341, 0xC35A },
    605     { 0xC361, 0xC37A },
    606     { 0xC381, 0xC3FE },
    607     { 0xC441, 0xC45A },
    608     { 0xC461, 0xC47A },
    609     { 0xC481, 0xC4FE },
    610     { 0xC541, 0xC55A },
    611     { 0xC561, 0xC57A },
    612     { 0xC581, 0xC5FE },
    613     { 0xC641, 0xC652 },
    614     { 0xC6A1, 0xC6FE },
    615     { 0xC7A1, 0xC7FE },
    616     { 0xC8A1, 0xC8FE },
    617     { 0xCAA1, 0xCAFE },
    618     { 0xCBA1, 0xCBFE },
    619     { 0xCCA1, 0xCCFE },
    620     { 0xCDA1, 0xCDFE },
    621     { 0xCEA1, 0xCEFE },
    622     { 0xCFA1, 0xCFFE },
    623     { 0xD0A1, 0xD0FE },
    624     { 0xD1A1, 0xD1FE },
    625     { 0xD2A1, 0xD2FE },
    626     { 0xD3A1, 0xD3FE },
    627     { 0xD4A1, 0xD4FE },
    628     { 0xD5A1, 0xD5FE },
    629     { 0xD6A1, 0xD6FE },
    630     { 0xD7A1, 0xD7FE },
    631     { 0xD8A1, 0xD8FE },
    632     { 0xD9A1, 0xD9FE },
    633     { 0xDAA1, 0xDAFE },
    634     { 0xDBA1, 0xDBFE },
    635     { 0xDCA1, 0xDCFE },
    636     { 0xDDA1, 0xDDFE },
    637     { 0xDEA1, 0xDEFE },
    638     { 0xDFA1, 0xDFFE },
    639     { 0xE0A1, 0xE0FE },
    640     { 0xE1A1, 0xE1FE },
    641     { 0xE2A1, 0xE2FE },
    642     { 0xE3A1, 0xE3FE },
    643     { 0xE4A1, 0xE4FE },
    644     { 0xE5A1, 0xE5FE },
    645     { 0xE6A1, 0xE6FE },
    646     { 0xE7A1, 0xE7FE },
    647     { 0xE8A1, 0xE8FE },
    648     { 0xE9A1, 0xE9FE },
    649     { 0xEAA1, 0xEAFE },
    650     { 0xEBA1, 0xEBFE },
    651     { 0xECA1, 0xECFE },
    652     { 0xEDA1, 0xEDFE },
    653     { 0xEEA1, 0xEEFE },
    654     { 0xEFA1, 0xEFFE },
    655     { 0xF0A1, 0xF0FE },
    656     { 0xF1A1, 0xF1FE },
    657     { 0xF2A1, 0xF2FE },
    658     { 0xF3A1, 0xF3FE },
    659     { 0xF4A1, 0xF4FE },
    660     { 0xF5A1, 0xF5FE },
    661     { 0xF6A1, 0xF6FE },
    662     { 0xF7A1, 0xF7FE },
    663     { 0xF8A1, 0xF8FE },
    664     { 0xF9A1, 0xF9FE },
    665     { 0xFAA1, 0xFAFE },
    666     { 0xFBA1, 0xFBFE },
    667     { 0xFCA1, 0xFCFE },
    668     { 0xFDA1, 0xFDFE },
    669 };
    670 
    671 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
    672 static const CharRange kBig5Ranges[] = {
    673     { 0xA140, 0xA17E },
    674     { 0xA1A1, 0xA1FE },
    675     { 0xA240, 0xA27E },
    676     { 0xA2A1, 0xA2FE },
    677     { 0xA340, 0xA37E },
    678     { 0xA3A1, 0xA3BF },
    679     { 0xA3E1, 0xA3E1 },
    680     { 0xA440, 0xA47E },
    681     { 0xA4A1, 0xA4FE },
    682     { 0xA540, 0xA57E },
    683     { 0xA5A1, 0xA5FE },
    684     { 0xA640, 0xA67E },
    685     { 0xA6A1, 0xA6FE },
    686     { 0xA740, 0xA77E },
    687     { 0xA7A1, 0xA7FE },
    688     { 0xA840, 0xA87E },
    689     { 0xA8A1, 0xA8FE },
    690     { 0xA940, 0xA97E },
    691     { 0xA9A1, 0xA9FE },
    692     { 0xAA40, 0xAA7E },
    693     { 0xAAA1, 0xAAFE },
    694     { 0xAB40, 0xAB7E },
    695     { 0xABA1, 0xABFE },
    696     { 0xAC40, 0xAC7E },
    697     { 0xACA1, 0xACFE },
    698     { 0xAD40, 0xAD7E },
    699     { 0xADA1, 0xADFE },
    700     { 0xAE40, 0xAE7E },
    701     { 0xAEA1, 0xAEFE },
    702     { 0xAF40, 0xAF7E },
    703     { 0xAFA1, 0xAFFE },
    704     { 0xB040, 0xB07E },
    705     { 0xB0A1, 0xB0FE },
    706     { 0xB140, 0xB17E },
    707     { 0xB1A1, 0xB1FE },
    708     { 0xB240, 0xB27E },
    709     { 0xB2A1, 0xB2FE },
    710     { 0xB340, 0xB37E },
    711     { 0xB3A1, 0xB3FE },
    712     { 0xB440, 0xB47E },
    713     { 0xB4A1, 0xB4FE },
    714     { 0xB540, 0xB57E },
    715     { 0xB5A1, 0xB5FE },
    716     { 0xB640, 0xB67E },
    717     { 0xB6A1, 0xB6FE },
    718     { 0xB740, 0xB77E },
    719     { 0xB7A1, 0xB7FE },
    720     { 0xB840, 0xB87E },
    721     { 0xB8A1, 0xB8FE },
    722     { 0xB940, 0xB97E },
    723     { 0xB9A1, 0xB9FE },
    724     { 0xBA40, 0xBA7E },
    725     { 0xBAA1, 0xBAFE },
    726     { 0xBB40, 0xBB7E },
    727     { 0xBBA1, 0xBBFE },
    728     { 0xBC40, 0xBC7E },
    729     { 0xBCA1, 0xBCFE },
    730     { 0xBD40, 0xBD7E },
    731     { 0xBDA1, 0xBDFE },
    732     { 0xBE40, 0xBE7E },
    733     { 0xBEA1, 0xBEFE },
    734     { 0xBF40, 0xBF7E },
    735     { 0xBFA1, 0xBFFE },
    736     { 0xC040, 0xC07E },
    737     { 0xC0A1, 0xC0FE },
    738     { 0xC140, 0xC17E },
    739     { 0xC1A1, 0xC1FE },
    740     { 0xC240, 0xC27E },
    741     { 0xC2A1, 0xC2FE },
    742     { 0xC340, 0xC37E },
    743     { 0xC3A1, 0xC3FE },
    744     { 0xC440, 0xC47E },
    745     { 0xC4A1, 0xC4FE },
    746     { 0xC540, 0xC57E },
    747     { 0xC5A1, 0xC5FE },
    748     { 0xC640, 0xC67E },
    749     { 0xC940, 0xC97E },
    750     { 0xC9A1, 0xC9FE },
    751     { 0xCA40, 0xCA7E },
    752     { 0xCAA1, 0xCAFE },
    753     { 0xCB40, 0xCB7E },
    754     { 0xCBA1, 0xCBFE },
    755     { 0xCC40, 0xCC7E },
    756     { 0xCCA1, 0xCCFE },
    757     { 0xCD40, 0xCD7E },
    758     { 0xCDA1, 0xCDFE },
    759     { 0xCE40, 0xCE7E },
    760     { 0xCEA1, 0xCEFE },
    761     { 0xCF40, 0xCF7E },
    762     { 0xCFA1, 0xCFFE },
    763     { 0xD040, 0xD07E },
    764     { 0xD0A1, 0xD0FE },
    765     { 0xD140, 0xD17E },
    766     { 0xD1A1, 0xD1FE },
    767     { 0xD240, 0xD27E },
    768     { 0xD2A1, 0xD2FE },
    769     { 0xD340, 0xD37E },
    770     { 0xD3A1, 0xD3FE },
    771     { 0xD440, 0xD47E },
    772     { 0xD4A1, 0xD4FE },
    773     { 0xD540, 0xD57E },
    774     { 0xD5A1, 0xD5FE },
    775     { 0xD640, 0xD67E },
    776     { 0xD6A1, 0xD6FE },
    777     { 0xD740, 0xD77E },
    778     { 0xD7A1, 0xD7FE },
    779     { 0xD840, 0xD87E },
    780     { 0xD8A1, 0xD8FE },
    781     { 0xD940, 0xD97E },
    782     { 0xD9A1, 0xD9FE },
    783     { 0xDA40, 0xDA7E },
    784     { 0xDAA1, 0xDAFE },
    785     { 0xDB40, 0xDB7E },
    786     { 0xDBA1, 0xDBFE },
    787     { 0xDC40, 0xDC7E },
    788     { 0xDCA1, 0xDCFE },
    789     { 0xDD40, 0xDD7E },
    790     { 0xDDA1, 0xDDFE },
    791     { 0xDE40, 0xDE7E },
    792     { 0xDEA1, 0xDEFE },
    793     { 0xDF40, 0xDF7E },
    794     { 0xDFA1, 0xDFFE },
    795     { 0xE040, 0xE07E },
    796     { 0xE0A1, 0xE0FE },
    797     { 0xE140, 0xE17E },
    798     { 0xE1A1, 0xE1FE },
    799     { 0xE240, 0xE27E },
    800     { 0xE2A1, 0xE2FE },
    801     { 0xE340, 0xE37E },
    802     { 0xE3A1, 0xE3FE },
    803     { 0xE440, 0xE47E },
    804     { 0xE4A1, 0xE4FE },
    805     { 0xE540, 0xE57E },
    806     { 0xE5A1, 0xE5FE },
    807     { 0xE640, 0xE67E },
    808     { 0xE6A1, 0xE6FE },
    809     { 0xE740, 0xE77E },
    810     { 0xE7A1, 0xE7FE },
    811     { 0xE840, 0xE87E },
    812     { 0xE8A1, 0xE8FE },
    813     { 0xE940, 0xE97E },
    814     { 0xE9A1, 0xE9FE },
    815     { 0xEA40, 0xEA7E },
    816     { 0xEAA1, 0xEAFE },
    817     { 0xEB40, 0xEB7E },
    818     { 0xEBA1, 0xEBFE },
    819     { 0xEC40, 0xEC7E },
    820     { 0xECA1, 0xECFE },
    821     { 0xED40, 0xED7E },
    822     { 0xEDA1, 0xEDFE },
    823     { 0xEE40, 0xEE7E },
    824     { 0xEEA1, 0xEEFE },
    825     { 0xEF40, 0xEF7E },
    826     { 0xEFA1, 0xEFFE },
    827     { 0xF040, 0xF07E },
    828     { 0xF0A1, 0xF0FE },
    829     { 0xF140, 0xF17E },
    830     { 0xF1A1, 0xF1FE },
    831     { 0xF240, 0xF27E },
    832     { 0xF2A1, 0xF2FE },
    833     { 0xF340, 0xF37E },
    834     { 0xF3A1, 0xF3FE },
    835     { 0xF440, 0xF47E },
    836     { 0xF4A1, 0xF4FE },
    837     { 0xF540, 0xF57E },
    838     { 0xF5A1, 0xF5FE },
    839     { 0xF640, 0xF67E },
    840     { 0xF6A1, 0xF6FE },
    841     { 0xF740, 0xF77E },
    842     { 0xF7A1, 0xF7FE },
    843     { 0xF840, 0xF87E },
    844     { 0xF8A1, 0xF8FE },
    845     { 0xF940, 0xF97E },
    846     { 0xF9A1, 0xF9FE },
    847 };
    848 
    849 static bool charMatchesEncoding(int ch, const CharRange* encodingRanges, int rangeCount) {
    850     // Use binary search to see if the character is contained in the encoding
    851     int low = 0;
    852     int high = rangeCount;
    853 
    854     while (low < high) {
    855         int i = (low + high) / 2;
    856         const CharRange* range = &encodingRanges[i];
    857         if (ch >= range->first && ch <= range->last)
    858             return true;
    859         if (ch > range->last)
    860             low = i + 1;
    861         else
    862             high = i;
    863     }
    864 
    865     return false;
    866 }
    867 
    868 extern uint32_t findPossibleEncodings(int ch)
    869 {
    870     // ASCII matches everything
    871     if (ch < 256) return kEncodingAll;
    872 
    873     int result = kEncodingNone;
    874 
    875     if (charMatchesEncoding(ch, kShiftJISRanges, ARRAY_SIZE(kShiftJISRanges)))
    876         result |= kEncodingShiftJIS;
    877     if (charMatchesEncoding(ch, kGBKRanges, ARRAY_SIZE(kGBKRanges)))
    878         result |= kEncodingGBK;
    879     if (charMatchesEncoding(ch, kBig5Ranges, ARRAY_SIZE(kBig5Ranges)))
    880         result |= kEncodingBig5;
    881     if (charMatchesEncoding(ch, kEUCKRRanges, ARRAY_SIZE(kEUCKRRanges)))
    882         result |= kEncodingEUCKR;
    883 
    884     return result;
    885 }
    886