Home | History | Annotate | Download | only in crt
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fxcrt/fx_ext.h"
      8 #include "xfa/fgas/crt/fgas_codepage.h"
      9 #include "xfa/fgas/crt/fgas_language.h"
     10 
     11 namespace {
     12 
     13 struct FX_STR2CPHASH {
     14   uint32_t uHash;
     15   uint16_t uCodePage;
     16 };
     17 
     18 struct FX_CHARSET_MAP {
     19   uint16_t charset;
     20   uint16_t codepage;
     21 };
     22 
     23 struct FX_LANG2CPMAP {
     24   uint16_t wLanguage;
     25   uint16_t wCodepage;
     26 };
     27 
     28 const FX_CHARSET_MAP g_FXCharset2CodePageTable[] = {
     29     {0, 1252},   {1, 0},      {2, 42},     {77, 10000}, {78, 10001},
     30     {79, 10003}, {80, 10008}, {81, 10002}, {83, 10005}, {84, 10004},
     31     {85, 10006}, {86, 10081}, {87, 10021}, {88, 10029}, {89, 10007},
     32     {128, 932},  {129, 949},  {130, 1361}, {134, 936},  {136, 950},
     33     {161, 1253}, {162, 1254}, {163, 1258}, {177, 1255}, {178, 1256},
     34     {186, 1257}, {204, 1251}, {222, 874},  {238, 1250}, {254, 437},
     35     {255, 850},
     36 };
     37 
     38 const FX_LANG2CPMAP g_FXLang2CodepageTable[] = {
     39     {FX_LANG_Arabic_SaudiArabia, FX_CODEPAGE_MSWin_Arabic},
     40     {FX_LANG_Bulgarian_Bulgaria, FX_CODEPAGE_MSWin_Cyrillic},
     41     {FX_LANG_Catalan_Catalan, FX_CODEPAGE_MSWin_WesternEuropean},
     42     {FX_LANG_Chinese_Taiwan, FX_CODEPAGE_ChineseTraditional},
     43     {FX_LANG_CzechRepublic, FX_CODEPAGE_MSWin_EasternEuropean},
     44     {FX_LANG_Danish_Denmark, FX_CODEPAGE_MSWin_WesternEuropean},
     45     {FX_LANG_German_Germany, FX_CODEPAGE_MSWin_WesternEuropean},
     46     {FX_LANG_Greek_Greece, FX_CODEPAGE_MSWin_Greek},
     47     {FX_LANG_English_UnitedStates, FX_CODEPAGE_MSWin_WesternEuropean},
     48     {FX_LANG_Spanish_TraditionalSort, FX_CODEPAGE_MSWin_WesternEuropean},
     49     {FX_LANG_Finnish_Finland, FX_CODEPAGE_MSWin_WesternEuropean},
     50     {FX_LANG_French_France, FX_CODEPAGE_MSWin_WesternEuropean},
     51     {FX_LANG_Hebrew_Israel, FX_CODEPAGE_MSWin_Hebrew},
     52     {FX_LANG_Hungarian_Hungary, FX_CODEPAGE_MSWin_EasternEuropean},
     53     {FX_LANG_Icelandic_Iceland, FX_CODEPAGE_MSWin_WesternEuropean},
     54     {FX_LANG_Italian_Italy, FX_CODEPAGE_MSWin_WesternEuropean},
     55     {FX_LANG_Japanese_Japan, FX_CODEPAGE_ShiftJIS},
     56     {FX_LANG_Korean_Korea, FX_CODEPAGE_Korean},
     57     {FX_LANG_Dutch_Netherlands, FX_CODEPAGE_MSWin_WesternEuropean},
     58     {FX_LANG_Norwegian_Bokmal, FX_CODEPAGE_MSWin_WesternEuropean},
     59     {FX_LANG_Polish_Poland, FX_CODEPAGE_MSWin_EasternEuropean},
     60     {FX_LANG_Portuguese_Brazil, FX_CODEPAGE_MSWin_WesternEuropean},
     61     {FX_LANG_Romanian_Romania, FX_CODEPAGE_MSWin_EasternEuropean},
     62     {FX_LANG_Russian_Russia, FX_CODEPAGE_MSWin_Cyrillic},
     63     {FX_LANG_Croatian_Croatia, FX_CODEPAGE_MSWin_EasternEuropean},
     64     {FX_LANG_Slovak_Slovakia, FX_CODEPAGE_MSWin_EasternEuropean},
     65     {FX_LANG_Albanian_Albania, FX_CODEPAGE_MSWin_EasternEuropean},
     66     {FX_LANG_Swedish_Sweden, FX_CODEPAGE_MSWin_WesternEuropean},
     67     {FX_LANG_Thai_Thailand, FX_CODEPAGE_MSDOS_Thai},
     68     {FX_LANG_Turkish_Turkey, FX_CODEPAGE_MSWin_Turkish},
     69     {FX_LANG_Urdu_Pakistan, FX_CODEPAGE_MSWin_Arabic},
     70     {FX_LANG_Indonesian_Indonesia, FX_CODEPAGE_MSWin_WesternEuropean},
     71     {FX_LANG_Ukrainian_Ukraine, FX_CODEPAGE_MSWin_Cyrillic},
     72     {FX_LANG_Belarusian_Belarus, FX_CODEPAGE_MSWin_Cyrillic},
     73     {FX_LANG_Slovenian_Slovenia, FX_CODEPAGE_MSWin_EasternEuropean},
     74     {FX_LANG_Estonian_Estonia, FX_CODEPAGE_MSWin_Baltic},
     75     {FX_LANG_Latvian_Latvia, FX_CODEPAGE_MSWin_Baltic},
     76     {FX_LANG_Lithuanian_Lithuania, FX_CODEPAGE_MSWin_Baltic},
     77     {FX_LANG_Persian, FX_CODEPAGE_MSWin_Arabic},
     78     {FX_LANG_Vietnamese_Vietnam, FX_CODEPAGE_MSWin_Vietnamese},
     79     {FX_LANG_Armenian_Armenia, FX_CODEPAGE_DefANSI},
     80     {FX_LANG_Azerbaijan_Latin, FX_CODEPAGE_MSWin_Turkish},
     81     {FX_LANG_Basque_Basque, FX_CODEPAGE_MSWin_WesternEuropean},
     82     {FX_LANG_Macedonian, FX_CODEPAGE_MSWin_Cyrillic},
     83     {FX_LANG_Afrikaans_SouthAfrica, FX_CODEPAGE_MSWin_WesternEuropean},
     84     {FX_LANG_Georgian_Georgia, FX_CODEPAGE_DefANSI},
     85     {FX_LANG_Faroese_FaroeIslands, FX_CODEPAGE_MSWin_WesternEuropean},
     86     {FX_LANG_Hindi_India, FX_CODEPAGE_DefANSI},
     87     {FX_LANG_Malay_Malaysia, FX_CODEPAGE_MSWin_WesternEuropean},
     88     {FX_LANG_Kazakh_Kazakhstan, FX_CODEPAGE_MSWin_Cyrillic},
     89     {FX_LANG_Kyrgyz_Kyrgyzstan, FX_CODEPAGE_MSWin_Cyrillic},
     90     {FX_LANG_Kiswahili_Kenya, FX_CODEPAGE_MSWin_WesternEuropean},
     91     {FX_LANG_Uzbek_LatinUzbekistan, FX_CODEPAGE_MSWin_Turkish},
     92     {FX_LANG_Tatar_Russia, FX_CODEPAGE_MSWin_Cyrillic},
     93     {FX_LANG_Punjabi_India, FX_CODEPAGE_DefANSI},
     94     {FX_LANG_Gujarati_India, FX_CODEPAGE_DefANSI},
     95     {FX_LANG_Tamil_India, FX_CODEPAGE_DefANSI},
     96     {FX_LANG_Telugu_India, FX_CODEPAGE_DefANSI},
     97     {FX_LANG_Kannada_India, FX_CODEPAGE_DefANSI},
     98     {FX_LANG_Marathi_India, FX_CODEPAGE_DefANSI},
     99     {FX_LANG_SanskritIndia, FX_CODEPAGE_DefANSI},
    100     {FX_LANG_Mongolian_CyrillicMongolia, FX_CODEPAGE_MSWin_Cyrillic},
    101     {FX_LANG_Galician_Galician, FX_CODEPAGE_MSWin_WesternEuropean},
    102     {FX_LANG_Konkani_India, FX_CODEPAGE_DefANSI},
    103     {FX_LANG_Syriac_Syria, FX_CODEPAGE_DefANSI},
    104     {FX_LANG_Divehi_Maldives, FX_CODEPAGE_DefANSI},
    105     {FX_LANG_Arabic_Iraq, FX_CODEPAGE_MSWin_Arabic},
    106     {FX_LANG_Chinese_PRC, FX_CODEPAGE_ChineseSimplified},
    107     {FX_LANG_German_Switzerland, FX_CODEPAGE_MSWin_WesternEuropean},
    108     {FX_LANG_English_UnitedKingdom, FX_CODEPAGE_MSWin_WesternEuropean},
    109     {FX_LANG_Spanish_Mexico, FX_CODEPAGE_MSWin_WesternEuropean},
    110     {FX_LANG_French_Belgium, FX_CODEPAGE_MSWin_WesternEuropean},
    111     {FX_LANG_Italian_Switzerland, FX_CODEPAGE_MSWin_WesternEuropean},
    112     {FX_LANG_Dutch_Belgium, FX_CODEPAGE_MSWin_WesternEuropean},
    113     {FX_LANG_Norwegian_Nynorsk, FX_CODEPAGE_MSWin_WesternEuropean},
    114     {FX_LANG_Portuguese_Portugal, FX_CODEPAGE_MSWin_WesternEuropean},
    115     {FX_LANG_SerbianLatin_Serbia, FX_CODEPAGE_MSWin_EasternEuropean},
    116     {FX_LANG_Swedish_Finland, FX_CODEPAGE_MSWin_WesternEuropean},
    117     {FX_LANG_Azerbaijan_Cyrillic, FX_CODEPAGE_MSWin_Cyrillic},
    118     {FX_LANG_Malay_BruneiDarussalam, FX_CODEPAGE_MSWin_WesternEuropean},
    119     {FX_LANG_Uzbek_CyrillicUzbekistan, FX_CODEPAGE_MSWin_Cyrillic},
    120     {FX_LANG_Arabic_Egypt, FX_CODEPAGE_MSWin_Arabic},
    121     {FX_LANG_Chinese_HongKong, FX_CODEPAGE_ChineseTraditional},
    122     {FX_LANG_German_Austria, FX_CODEPAGE_MSWin_WesternEuropean},
    123     {FX_LANG_English_Australia, FX_CODEPAGE_MSWin_WesternEuropean},
    124     {FX_LANG_Spanish_InternationalSort, FX_CODEPAGE_MSWin_WesternEuropean},
    125     {FX_LANG_French_Canada, FX_CODEPAGE_MSWin_WesternEuropean},
    126     {FX_LANG_SerbianCyrillic_Serbia, FX_CODEPAGE_MSWin_Cyrillic},
    127     {FX_LANG_Arabic_Libya, FX_CODEPAGE_MSWin_Arabic},
    128     {FX_LANG_Chinese_Singapore, FX_CODEPAGE_ChineseSimplified},
    129     {FX_LANG_German_Luxembourg, FX_CODEPAGE_MSWin_WesternEuropean},
    130     {FX_LANG_English_Canada, FX_CODEPAGE_MSWin_WesternEuropean},
    131     {FX_LANG_Spanish_Guatemala, FX_CODEPAGE_MSWin_WesternEuropean},
    132     {FX_LANG_French_Switzerland, FX_CODEPAGE_MSWin_WesternEuropean},
    133     {FX_LANG_Arabic_Algeria, FX_CODEPAGE_MSWin_Arabic},
    134     {FX_LANG_Chinese_Macao, FX_CODEPAGE_ChineseTraditional},
    135     {FX_LANG_German_Liechtenstein, FX_CODEPAGE_MSWin_WesternEuropean},
    136     {FX_LANG_English_NewZealand, FX_CODEPAGE_MSWin_WesternEuropean},
    137     {FX_LANG_Spanish_CostaRica, FX_CODEPAGE_MSWin_WesternEuropean},
    138     {FX_LANG_French_Luxembourg, FX_CODEPAGE_MSWin_WesternEuropean},
    139     {FX_LANG_Arabic_Morocco, FX_CODEPAGE_MSWin_Arabic},
    140     {FX_LANG_English_Ireland, FX_CODEPAGE_MSWin_WesternEuropean},
    141     {FX_LANG_Spanish_Panama, FX_CODEPAGE_MSWin_WesternEuropean},
    142     {FX_LANG_French_Monaco, FX_CODEPAGE_MSWin_WesternEuropean},
    143     {FX_LANG_Arabic_Tunisia, FX_CODEPAGE_MSWin_Arabic},
    144     {FX_LANG_English_SouthAfrica, FX_CODEPAGE_MSWin_WesternEuropean},
    145     {FX_LANG_Spanish_DominicanRepublic, FX_CODEPAGE_MSWin_WesternEuropean},
    146     {FX_LANG_Arabic_Oman, FX_CODEPAGE_MSWin_Arabic},
    147     {FX_LANG_English_Jamaica, FX_CODEPAGE_MSWin_WesternEuropean},
    148     {FX_LANG_Spanish_Venezuela, FX_CODEPAGE_MSWin_WesternEuropean},
    149     {FX_LANG_Arabic_Yemen, FX_CODEPAGE_MSWin_Arabic},
    150     {FX_LANG_English_Caribbean, FX_CODEPAGE_MSWin_WesternEuropean},
    151     {FX_LANG_Spanish_Colombia, FX_CODEPAGE_MSWin_WesternEuropean},
    152     {FX_LANG_Arabic_Syria, FX_CODEPAGE_MSWin_Arabic},
    153     {FX_LANG_English_Belize, FX_CODEPAGE_MSWin_WesternEuropean},
    154     {FX_LANG_Spanish_Peru, FX_CODEPAGE_MSWin_WesternEuropean},
    155     {FX_LANG_Arabic_Jordan, FX_CODEPAGE_MSWin_Arabic},
    156     {FX_LANG_English_TrinidadTobago, FX_CODEPAGE_MSWin_WesternEuropean},
    157     {FX_LANG_Spanish_Argentina, FX_CODEPAGE_MSWin_WesternEuropean},
    158     {FX_LANG_Arabic_Lebanon, FX_CODEPAGE_MSWin_Arabic},
    159     {FX_LANG_English_Zimbabwe, FX_CODEPAGE_MSWin_WesternEuropean},
    160     {FX_LANG_Spanish_Ecuador, FX_CODEPAGE_MSWin_WesternEuropean},
    161     {FX_LANG_Arabic_Kuwait, FX_CODEPAGE_MSWin_Arabic},
    162     {FX_LANG_English_Philippines, FX_CODEPAGE_MSWin_WesternEuropean},
    163     {FX_LANG_Spanish_Chile, FX_CODEPAGE_MSWin_WesternEuropean},
    164     {FX_LANG_Arabic_UAE, FX_CODEPAGE_MSWin_Arabic},
    165     {FX_LANG_Spanish_Uruguay, FX_CODEPAGE_MSWin_WesternEuropean},
    166     {FX_LANG_Arabic_Bahrain, FX_CODEPAGE_MSWin_Arabic},
    167     {FX_LANG_Spanish_Paraguay, FX_CODEPAGE_MSWin_WesternEuropean},
    168     {FX_LANG_Arabic_Qatar, FX_CODEPAGE_MSWin_Arabic},
    169     {FX_LANG_Spanish_Bolivia, FX_CODEPAGE_MSWin_WesternEuropean},
    170     {FX_LANG_Spanish_ElSalvador, FX_CODEPAGE_MSWin_WesternEuropean},
    171     {FX_LANG_Spanish_Honduras, FX_CODEPAGE_MSWin_WesternEuropean},
    172     {FX_LANG_Spanish_Nicaragua, FX_CODEPAGE_MSWin_WesternEuropean},
    173     {FX_LANG_Spanish_PuertoRico, FX_CODEPAGE_MSWin_WesternEuropean},
    174 };
    175 
    176 const FX_STR2CPHASH g_FXCPHashTable[] = {
    177     {0xd45, 0x6faf},      {0xd46, 0x6fb0},      {0xd47, 0x6fb1},
    178     {0xd48, 0x6fb2},      {0xd49, 0x4e6},       {0xd4d, 0x6fbd},
    179     {0xe9e, 0x4e4},       {0xc998, 0x1b5},      {0x18ef0, 0x3a8},
    180     {0x19f85, 0x5182},    {0x2e2335, 0x3b6},    {0x325153, 0x5182},
    181     {0x145bded, 0x2716},  {0x3c9a5f2, 0xc6f3},  {0x4c45f2d, 0x3a4},
    182     {0x4c45f4e, 0xc431},  {0x58caf51, 0x4e4},   {0x5a5cd7d, 0x3a8},
    183     {0x5a6c6a7, 0x4e4},   {0x5a6ca0b, 0x1b5},   {0x5a6cd68, 0x307},
    184     {0x5a6d8d3, 0x4e4},   {0x5a6d948, 0x354},   {0x5a6d96b, 0x362},
    185     {0x5a6d984, 0x366},   {0x5a90e35, 0x1b5},   {0x5e0cf00, 0x6fb5},
    186     {0x609c324, 0x551},   {0x617d97f, 0x5182},  {0x6a6fd91, 0xfde8},
    187     {0x6a6fd92, 0xfde9},  {0x6b102de, 0xcadc},  {0x6b10f48, 0x4e89},
    188     {0x1020805f, 0x4e4},  {0x10f0524c, 0x6fb5}, {0x11d558fe, 0x6fb0},
    189     {0x13898d19, 0xc42d}, {0x13898d3a, 0xc431}, {0x138a319e, 0x6fb1},
    190     {0x14679c09, 0x96c6}, {0x153f0a3d, 0x6fb2}, {0x1576eeb3, 0x4e20},
    191     {0x169a0ce6, 0xc6f9}, {0x16f3e2dc, 0x6fb3}, {0x18a8bb7a, 0x6fb4},
    192     {0x1a5d9419, 0x6fb5}, {0x1a847b48, 0x3a8},  {0x1b762419, 0xcec8},
    193     {0x1b9d7847, 0x475},  {0x1c126cb9, 0x6fb6}, {0x1ccdbc7d, 0x4f42},
    194     {0x1d330f5f, 0x2714}, {0x1dc74559, 0x4e6},  {0x1edd80da, 0x4e4},
    195     {0x23e4b03d, 0xfde8}, {0x24f28a16, 0x4f3d}, {0x286e7a32, 0x2715},
    196     {0x2c7c18ed, 0x3a8},  {0x2e2103b7, 0x2713}, {0x304bf479, 0x6fb4},
    197     {0x304bf47d, 0x6fb5}, {0x309bb869, 0xfde8}, {0x309bb86a, 0xfde9},
    198     {0x33664357, 0x3b6},  {0x352d6b49, 0x3a4},  {0x36f5661c, 0x1b5},
    199     {0x392e8f48, 0xcadc}, {0x3dc7c64c, 0x47c},  {0x3ed2e8e1, 0x4e4},
    200     {0x3f0c2fea, 0xcaed}, {0x3f0fef8f, 0xc6f2}, {0x3f5e130f, 0x5182},
    201     {0x47174d1f, 0x3a8},  {0x49686b7b, 0x6fb4}, {0x4b80b0d9, 0x3a4},
    202     {0x4dcda97a, 0x4e4},  {0x4dcda9b6, 0x4e4},  {0x4e881e6a, 0x5221},
    203     {0x4ffdf5a1, 0x36a},  {0x4ffdf5a5, 0x6fbd}, {0x5241ce16, 0x4e8b},
    204     {0x546bab9d, 0x4e4},  {0x54a3d64e, 0x6fb6}, {0x562179bd, 0x5161},
    205     {0x57c1df15, 0xc6f7}, {0x61ff6e62, 0x4f36}, {0x6359c7d8, 0x4f35},
    206     {0x63f3c335, 0x3a8},  {0x645a0f78, 0x477},  {0x691ac2fd, 0x275f},
    207     {0x6dc2eab0, 0x2d0},  {0x6dc2eeef, 0x35e},  {0x6dc2ef10, 0x36a},
    208     {0x7103138a, 0x47d},  {0x710dfbd0, 0xc6f5}, {0x7319f6cb, 0x36a},
    209     {0x745096ad, 0x3a8},  {0x74866229, 0x4e8c}, {0x77185fa5, 0x3a8},
    210     {0x7953f002, 0x6faf}, {0x7953f003, 0x6fb0}, {0x7953f004, 0x6fb1},
    211     {0x7953f005, 0x6fb2}, {0x7953f006, 0x6fb7}, {0x7953f00a, 0x6fbd},
    212     {0x7c577571, 0x2761}, {0x7e8c8ff1, 0x479},  {0x8031f47f, 0x3b5},
    213     {0x8031f481, 0x3b5},  {0x80c4a710, 0x5187}, {0x857c7e14, 0xfde8},
    214     {0x857c7e15, 0xfde9}, {0x86b59c90, 0x4e4},  {0x86b59c91, 0x6fb0},
    215     {0x86b59c92, 0x6fb1}, {0x86b59c93, 0x6fb2}, {0x86b59c94, 0x6fb3},
    216     {0x86b59c95, 0x6fb4}, {0x86b59c96, 0x6fb5}, {0x86b59c97, 0x4e7},
    217     {0x86b59c98, 0x4e6},  {0x8b4b24ec, 0x5190}, {0x8face362, 0x4e4},
    218     {0x8ff9ec2a, 0xfde9}, {0x919d3989, 0xcadc}, {0x9967e5ad, 0x4e22},
    219     {0x99f8b933, 0x6fbd}, {0x9bd2a380, 0x4fc7}, {0x9befad23, 0x4f38},
    220     {0x9c7ac649, 0x4f3c}, {0xa02468db, 0xdeae}, {0xa02468ec, 0xdeab},
    221     {0xa024692a, 0xdeaa}, {0xa0246997, 0xdeb2}, {0xa02469ff, 0xdeb0},
    222     {0xa0246a3d, 0xdeb1}, {0xa0246a8c, 0xdeaf}, {0xa0246a9a, 0xdeb3},
    223     {0xa0246b16, 0xdeac}, {0xa0246b1a, 0xdead}, {0xa071addc, 0x4b1},
    224     {0xa38b62dc, 0x474},  {0xa4c09fed, 0x3a8},  {0xa51e86e5, 0x4e7},
    225     {0xa67ab13e, 0x3a4},  {0xa7414244, 0x51a9}, {0xa9ddbead, 0xc6fb},
    226     {0xab24ffab, 0x4e8a}, {0xabef8ac4, 0x2710}, {0xabfa20ac, 0x6fb4},
    227     {0xad36895e, 0x4e2},  {0xad36895f, 0x4e3},  {0xaf310e90, 0x402},
    228     {0xaf31166f, 0x4e8},  {0xaf7277a5, 0x3b6},  {0xafc0d8b3, 0x96c6},
    229     {0xb0fd5dba, 0xcae0}, {0xb0fd5e95, 0xcadc}, {0xb1052893, 0x7149},
    230     {0xb1e98745, 0x36a},  {0xb277e91c, 0x5166}, {0xb2f7eac5, 0xcae0},
    231     {0xb2f7eba0, 0xcadc}, {0xb2f7ebc1, 0x3b5},  {0xb53fa77d, 0x3a8},
    232     {0xb6391138, 0x6fb5}, {0xb7358b7f, 0x6fb6}, {0xb8c42b40, 0x4e4},
    233     {0xb8c42ea4, 0x1b5},  {0xb8c439e7, 0x2e1},  {0xb8c43a61, 0x307},
    234     {0xb8c43d6c, 0x4e4},  {0xb8c43ddf, 0x352},  {0xb8c43de1, 0x354},
    235     {0xb8c43de6, 0x359},  {0xb8c43dff, 0x35d},  {0xb8c43e04, 0x362},
    236     {0xb8c43e07, 0x365},  {0xbcd29a7f, 0x3a8},  {0xbce34e78, 0x5182},
    237     {0xbce34e7b, 0x556a}, {0xbce81504, 0x3b5},  {0xbd8a4c95, 0x272d},
    238     {0xbdd89dad, 0x4e4},  {0xbdd89dae, 0x6fb0}, {0xbdd89daf, 0x6fb1},
    239     {0xbdd89db0, 0x6fb2}, {0xbdd89db1, 0x4e6},  {0xbdd89db5, 0x6fbd},
    240     {0xc1756e9f, 0x36b},  {0xc7482444, 0x47a},  {0xc9281c18, 0x4e4},
    241     {0xc9ef95df, 0x47b},  {0xccc9db0d, 0x4e4},  {0xccc9db0e, 0x6fb0},
    242     {0xcd73425f, 0x3b6},  {0xce38b40b, 0x4b0},  {0xce99e549, 0x25},
    243     {0xcf598740, 0x4e7},  {0xcf6d6f78, 0x4e4},  {0xcf758df6, 0x3a4},
    244     {0xd1266e51, 0x6fb5}, {0xd2910213, 0x2718}, {0xd29196bb, 0x2712},
    245     {0xd3eb2fc2, 0x476},  {0xd442dc2c, 0x4fc4}, {0xd9da4da4, 0x2711},
    246     {0xdbad2f42, 0x4e4},  {0xdbad2f43, 0x6fb0}, {0xdbad2f44, 0x6fb1},
    247     {0xdbad2f45, 0x6fb2}, {0xdbad2f46, 0x6fb3}, {0xdbad2f47, 0x6fb4},
    248     {0xdbad2f48, 0x6fb5}, {0xdbad2f49, 0x6fb6}, {0xdbad2f4a, 0x4e6},
    249     {0xdc438033, 0x4f31}, {0xdccb439b, 0x477},  {0xdccdc626, 0x3b5},
    250     {0xdd80a595, 0x4e4},  {0xdd80a596, 0x6fb0}, {0xdd80a59e, 0x6fb1},
    251     {0xdd80a5b4, 0x6fb2}, {0xdd80a5d9, 0x6fb5}, {0xdd80a5da, 0x6fb4},
    252     {0xdd80a5fa, 0x6fb6}, {0xdd80a615, 0x6fb3}, {0xdd80a619, 0x4e6},
    253     {0xdd80a61a, 0x3b5},  {0xdd80c0f8, 0x4e9f}, {0xdf7e46ff, 0x4fc8},
    254     {0xdf8680fd, 0x556a}, {0xdfb0bd6e, 0xc42d}, {0xdff05486, 0x2c4},
    255     {0xe3323399, 0x3a4},  {0xe60412dd, 0x3b5},  {0xeee47add, 0x4b0},
    256     {0xf021a186, 0x4e2},  {0xf021a187, 0x4e3},  {0xf021a188, 0x4e4},
    257     {0xf021a189, 0x4e5},  {0xf021a18a, 0x4e6},  {0xf021a18b, 0x4e7},
    258     {0xf021a18c, 0x4e8},  {0xf021a18d, 0x4e9},  {0xf021a18e, 0x4ea},
    259     {0xf0700456, 0x6fb3}, {0xf274f175, 0x3b5},  {0xf2a9730b, 0x3a8},
    260     {0xf3d463c2, 0x3a4},  {0xf52a70a3, 0xc42e}, {0xf5693147, 0x6fb3},
    261     {0xf637e157, 0x478},  {0xfc213f3a, 0x2717}, {0xff654d14, 0x3b5},
    262 };
    263 
    264 uint16_t GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength) {
    265   ASSERT(pStr);
    266   if (iLength < 0) {
    267     iLength = FXSYS_strlen(pStr);
    268   }
    269   if (iLength == 0) {
    270     return 0xFFFF;
    271   }
    272   uint32_t uHash = FX_HashCode_GetA(CFX_ByteStringC(pStr, iLength), true);
    273   int32_t iStart = 0;
    274   int32_t iEnd = sizeof(g_FXCPHashTable) / sizeof(FX_STR2CPHASH) - 1;
    275   ASSERT(iEnd >= 0);
    276   do {
    277     int32_t iMid = (iStart + iEnd) / 2;
    278     const FX_STR2CPHASH& cp = g_FXCPHashTable[iMid];
    279     if (uHash == cp.uHash) {
    280       return (uint16_t)cp.uCodePage;
    281     } else if (uHash < cp.uHash) {
    282       iEnd = iMid - 1;
    283     } else {
    284       iStart = iMid + 1;
    285     }
    286   } while (iStart <= iEnd);
    287   return 0xFFFF;
    288 }
    289 
    290 }  // namespace
    291 
    292 uint16_t FX_GetCodePageFromCharset(uint8_t charset) {
    293   int32_t iEnd = sizeof(g_FXCharset2CodePageTable) / sizeof(FX_CHARSET_MAP) - 1;
    294   ASSERT(iEnd >= 0);
    295   int32_t iStart = 0, iMid;
    296   do {
    297     iMid = (iStart + iEnd) / 2;
    298     const FX_CHARSET_MAP& cp = g_FXCharset2CodePageTable[iMid];
    299     if (charset == cp.charset) {
    300       return cp.codepage;
    301     } else if (charset < cp.charset) {
    302       iEnd = iMid - 1;
    303     } else {
    304       iStart = iMid + 1;
    305     }
    306   } while (iStart <= iEnd);
    307   return 0xFFFF;
    308 }
    309 
    310 uint16_t FX_GetDefCodePageByLanguage(uint16_t wLanguage) {
    311   int32_t iEnd = sizeof(g_FXLang2CodepageTable) / sizeof(FX_LANG2CPMAP) - 1;
    312   ASSERT(iEnd >= 0);
    313   int32_t iStart = 0, iMid;
    314   do {
    315     iMid = (iStart + iEnd) / 2;
    316     const FX_LANG2CPMAP& cp = g_FXLang2CodepageTable[iMid];
    317     if (wLanguage == cp.wLanguage) {
    318       return cp.wCodepage;
    319     } else if (wLanguage < cp.wLanguage) {
    320       iEnd = iMid - 1;
    321     } else {
    322       iStart = iMid + 1;
    323     }
    324   } while (iStart <= iEnd);
    325   return 0xFFFF;
    326 }
    327 
    328 uint16_t FX_GetCodePageFromStringW(const FX_WCHAR* pStr, int32_t iLength) {
    329   if (iLength < 0) {
    330     iLength = FXSYS_wcslen(pStr);
    331   }
    332   if (iLength == 0) {
    333     return 0xFFFF;
    334   }
    335   CFX_ByteString csStr;
    336   FX_CHAR* pBuf = csStr.GetBuffer(iLength + 1);
    337   for (int32_t i = 0; i < iLength; ++i) {
    338     *pBuf++ = (FX_CHAR)*pStr++;
    339   }
    340   csStr.ReleaseBuffer(iLength);
    341   return GetCodePageFromStringA(csStr.c_str(), iLength);
    342 }
    343 
    344 void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength) {
    345   ASSERT(pStr);
    346   if (iLength < 0) {
    347     iLength = FXSYS_wcslen(pStr);
    348   }
    349   uint16_t wch;
    350   if (sizeof(FX_WCHAR) > 2) {
    351     while (iLength-- > 0) {
    352       wch = (uint16_t)*pStr;
    353       wch = (wch >> 8) | (wch << 8);
    354       wch &= 0x00FF;
    355       *pStr++ = wch;
    356     }
    357   } else {
    358     while (iLength-- > 0) {
    359       wch = (uint16_t)*pStr;
    360       wch = (wch >> 8) | (wch << 8);
    361       *pStr++ = wch;
    362     }
    363   }
    364 }
    365 
    366 void FX_UTF16ToWChar(void* pBuffer, int32_t iLength) {
    367   ASSERT(pBuffer && iLength > 0);
    368   if (sizeof(FX_WCHAR) == 2) {
    369     return;
    370   }
    371   uint16_t* pSrc = (uint16_t*)pBuffer;
    372   FX_WCHAR* pDst = (FX_WCHAR*)pBuffer;
    373   while (--iLength >= 0) {
    374     pDst[iLength] = (FX_WCHAR)pSrc[iLength];
    375   }
    376 }
    377 
    378 int32_t FX_DecodeString(uint16_t wCodePage,
    379                         const FX_CHAR* pSrc,
    380                         int32_t* pSrcLen,
    381                         FX_WCHAR* pDst,
    382                         int32_t* pDstLen,
    383                         bool bErrBreak) {
    384   if (wCodePage == FX_CODEPAGE_UTF8) {
    385     return FX_UTF8Decode(pSrc, pSrcLen, pDst, pDstLen);
    386   }
    387   return -1;
    388 }
    389 int32_t FX_UTF8Decode(const FX_CHAR* pSrc,
    390                       int32_t* pSrcLen,
    391                       FX_WCHAR* pDst,
    392                       int32_t* pDstLen) {
    393   if (!pSrcLen || !pDstLen) {
    394     return -1;
    395   }
    396   int32_t iSrcLen = *pSrcLen;
    397   if (iSrcLen < 1) {
    398     *pSrcLen = *pDstLen = 0;
    399     return 1;
    400   }
    401   int32_t iDstLen = *pDstLen;
    402   bool bValidDst = (pDst && iDstLen > 0);
    403   uint32_t dwCode = 0;
    404   int32_t iPending = 0;
    405   int32_t iSrcNum = 0, iDstNum = 0;
    406   int32_t k = 0;
    407   int32_t iIndex = 0;
    408   k = 1;
    409   while (iIndex < iSrcLen) {
    410     uint8_t byte = (uint8_t) * (pSrc + iIndex);
    411     if (byte < 0x80) {
    412       iPending = 0;
    413       k = 1;
    414       iDstNum++;
    415       iSrcNum += k;
    416       if (bValidDst) {
    417         *pDst++ = byte;
    418         if (iDstNum >= iDstLen) {
    419           break;
    420         }
    421       }
    422     } else if (byte < 0xc0) {
    423       if (iPending < 1) {
    424         break;
    425       }
    426       iPending--;
    427       dwCode |= (byte & 0x3f) << (iPending * 6);
    428       if (iPending == 0) {
    429         iDstNum++;
    430         iSrcNum += k;
    431         if (bValidDst) {
    432           *pDst++ = dwCode;
    433           if (iDstNum >= iDstLen) {
    434             break;
    435           }
    436         }
    437       }
    438     } else if (byte < 0xe0) {
    439       iPending = 1;
    440       k = 2;
    441       dwCode = (byte & 0x1f) << 6;
    442     } else if (byte < 0xf0) {
    443       iPending = 2;
    444       k = 3;
    445       dwCode = (byte & 0x0f) << 12;
    446     } else if (byte < 0xf8) {
    447       iPending = 3;
    448       k = 4;
    449       dwCode = (byte & 0x07) << 18;
    450     } else if (byte < 0xfc) {
    451       iPending = 4;
    452       k = 5;
    453       dwCode = (byte & 0x03) << 24;
    454     } else if (byte < 0xfe) {
    455       iPending = 5;
    456       k = 6;
    457       dwCode = (byte & 0x01) << 30;
    458     } else {
    459       break;
    460     }
    461     iIndex++;
    462   }
    463   *pSrcLen = iSrcNum;
    464   *pDstLen = iDstNum;
    465   return 1;
    466 }
    467