Home | History | Annotate | Download | only in i18n
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2005-2013, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #include "cmemory.h"
     11 
     12 #if !UCONFIG_NO_CONVERSION
     13 #include "csrsbcs.h"
     14 #include "csmatch.h"
     15 
     16 #define N_GRAM_SIZE 3
     17 #define N_GRAM_MASK 0xFFFFFF
     18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
     23  : ngram(0), byteIndex(0)
     24 {
     25     ngramList = theNgramList;
     26     charMap   = theCharMap;
     27 
     28     ngramCount = hitCount = 0;
     29 }
     30 
     31 /*
     32  * Binary search for value in table, which must have exactly 64 entries.
     33  */
     34 
     35 int32_t NGramParser::search(const int32_t *table, int32_t value)
     36 {
     37     int32_t index = 0;
     38 
     39     if (table[index + 32] <= value) {
     40         index += 32;
     41     }
     42 
     43     if (table[index + 16] <= value) {
     44         index += 16;
     45     }
     46 
     47     if (table[index + 8] <= value) {
     48         index += 8;
     49     }
     50 
     51     if (table[index + 4] <= value) {
     52         index += 4;
     53     }
     54 
     55     if (table[index + 2] <= value) {
     56         index += 2;
     57     }
     58 
     59     if (table[index + 1] <= value) {
     60         index += 1;
     61     }
     62 
     63     if (table[index] > value) {
     64         index -= 1;
     65     }
     66 
     67     if (index < 0 || table[index] != value) {
     68         return -1;
     69     }
     70 
     71     return index;
     72 }
     73 
     74 void NGramParser::lookup(int32_t thisNgram)
     75 {
     76     ngramCount += 1;
     77 
     78     if (search(ngramList, thisNgram) >= 0) {
     79         hitCount += 1;
     80     }
     81 
     82 }
     83 
     84 void NGramParser::addByte(int32_t b)
     85 {
     86     ngram = ((ngram << 8) + b) & N_GRAM_MASK;
     87     lookup(ngram);
     88 }
     89 
     90 int32_t NGramParser::nextByte(InputText *det)
     91 {
     92     if (byteIndex >= det->fInputLen) {
     93         return -1;
     94     }
     95 
     96     return det->fInputBytes[byteIndex++];
     97 }
     98 
     99 void NGramParser::parseCharacters(InputText *det)
    100 {
    101     int32_t b;
    102     bool ignoreSpace = FALSE;
    103 
    104     while ((b = nextByte(det)) >= 0) {
    105         uint8_t mb = charMap[b];
    106 
    107         // TODO: 0x20 might not be a space in all character sets...
    108         if (mb != 0) {
    109             if (!(mb == 0x20 && ignoreSpace)) {
    110                 addByte(mb);
    111             }
    112 
    113             ignoreSpace = (mb == 0x20);
    114         }
    115     }
    116 }
    117 
    118 int32_t NGramParser::parse(InputText *det)
    119 {
    120     parseCharacters(det);
    121 
    122     // TODO: Is this OK? The buffer could have ended in the middle of a word...
    123     addByte(0x20);
    124 
    125     double rawPercent = (double) hitCount / (double) ngramCount;
    126 
    127     //            if (rawPercent <= 2.0) {
    128     //                return 0;
    129     //            }
    130 
    131     // TODO - This is a bit of a hack to take care of a case
    132     // were we were getting a confidence of 135...
    133     if (rawPercent > 0.33) {
    134         return 98;
    135     }
    136 
    137     return (int32_t) (rawPercent * 300.0);
    138 }
    139 
    140 static const uint8_t unshapeMap_IBM420[] = {
    141 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    142 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    143 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    144 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    145 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    146 /* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
    147 /* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
    148 /* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    149 /* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
    150 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
    151 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
    152 /* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
    153 /* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
    154 /* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
    155 /* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
    156 /* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    157 /* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    158 };
    159 
    160 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
    161 {
    162 	alef = 0x00;
    163 }
    164 
    165 
    166 int32_t NGramParser_IBM420::isLamAlef(int32_t b)
    167 {
    168 	if(b == 0xB2 || b == 0xB3){
    169          	return 0x47;
    170         }else if(b == 0xB4 || b == 0xB5){
    171          	return 0x49;
    172         }else if(b == 0xB8 || b == 0xB9){
    173          	return 0x56;
    174         }else
    175          	return 0x00;
    176 }
    177 
    178 /*
    179 * Arabic shaping needs to be done manually. Cannot call ArabicShaping class
    180 * because CharsetDetector is dealing with bytes not Unicode code points. We could
    181 * convert the bytes to Unicode code points but that would leave us dependent
    182 * on CharsetICU which we try to avoid. IBM420 converter amongst different versions
    183 * of JDK can produce different results and therefore is also avoided.
    184 */
    185 int32_t NGramParser_IBM420::nextByte(InputText *det)
    186 {
    187 
    188     if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
    189         return -1;
    190     }
    191     int next;
    192 
    193     alef = isLamAlef(det->fInputBytes[byteIndex]);
    194     if(alef != 0x00)
    195         next = 0xB1 & 0xFF;
    196     else
    197         next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
    198 
    199     byteIndex++;
    200 
    201     return next;
    202 }
    203 
    204 void NGramParser_IBM420::parseCharacters(InputText *det)
    205 {
    206 	int32_t b;
    207     bool ignoreSpace = FALSE;
    208 
    209     while ((b = nextByte(det)) >= 0) {
    210         uint8_t mb = charMap[b];
    211 
    212         // TODO: 0x20 might not be a space in all character sets...
    213         if (mb != 0) {
    214             if (!(mb == 0x20 && ignoreSpace)) {
    215                 addByte(mb);
    216             }
    217             ignoreSpace = (mb == 0x20);
    218         }
    219 
    220 		if(alef != 0x00){
    221             mb = charMap[alef & 0xFF];
    222 
    223             // TODO: 0x20 might not be a space in all character sets...
    224             if (mb != 0) {
    225                 if (!(mb == 0x20 && ignoreSpace)) {
    226                     addByte(mb);
    227                 }
    228 
    229                 ignoreSpace = (mb == 0x20);
    230             }
    231 
    232         }
    233     }
    234 }
    235 
    236 CharsetRecog_sbcs::CharsetRecog_sbcs()
    237 {
    238     // nothing else to do
    239 }
    240 
    241 CharsetRecog_sbcs::~CharsetRecog_sbcs()
    242 {
    243     // nothing to do
    244 }
    245 
    246 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
    247 {
    248     NGramParser parser(ngrams, byteMap);
    249     int32_t result;
    250 
    251     result = parser.parse(det);
    252 
    253     return result;
    254 }
    255 
    256 static const uint8_t charMap_8859_1[] = {
    257     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    258     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    259     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    260     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    261     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    262     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    263     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    264     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    265     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    266     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    267     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    268     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    269     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    270     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    271     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    272     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    273     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    274     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    275     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    276     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    277     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    278     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    279     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    280     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
    281     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    282     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    283     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    284     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
    285     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    286     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    287     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    288     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    289 };
    290 
    291 static const uint8_t charMap_8859_2[] = {
    292     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    293     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    294     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    295     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    296     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    297     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    298     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    299     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    300     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    301     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    302     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    303     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    304     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    305     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    306     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    307     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    308     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    309     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    310     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    311     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    312     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
    313     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
    314     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
    315     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
    316     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    317     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    318     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    319     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
    320     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    321     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    322     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    323     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
    324 };
    325 
    326 static const uint8_t charMap_8859_5[] = {
    327     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    328     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    329     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    330     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    331     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    332     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    333     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    334     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    335     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    336     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    337     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    338     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    339     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    340     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    341     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    342     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    343     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    344     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    345     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    346     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    347     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    348     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
    349     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    350     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    351     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    352     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    353     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    354     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    355     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    356     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    357     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    358     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
    359 };
    360 
    361 static const uint8_t charMap_8859_6[] = {
    362     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    363     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    364     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    365     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    366     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    367     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    368     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    369     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    370     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    371     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    372     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    373     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    374     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    375     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    376     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    377     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    378     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    379     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    380     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    381     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    382     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    383     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    384     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    385     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    386     0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    387     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    388     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    389     0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
    390     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    391     0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
    392     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    393     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    394 };
    395 
    396 static const uint8_t charMap_8859_7[] = {
    397     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    398     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    399     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    400     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    401     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    402     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    403     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    404     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    405     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    406     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    407     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    408     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    409     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    410     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    411     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    412     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    413     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    414     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    415     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    416     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    417     0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
    418     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    419     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
    420     0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
    421     0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    422     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    423     0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    424     0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
    425     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    426     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    427     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    428     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
    429 };
    430 
    431 static const uint8_t charMap_8859_8[] = {
    432     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    433     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    434     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    435     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    436     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    437     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    438     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    439     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    440     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    441     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    442     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    443     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    444     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    445     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    446     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    447     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    448     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    449     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    450     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    451     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    452     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    453     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    454     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    455     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    456     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    457     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    458     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    459     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    460     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    461     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    462     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    463     0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
    464 };
    465 
    466 static const uint8_t charMap_8859_9[] = {
    467     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    468     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    469     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    470     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    471     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    472     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    473     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    474     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    475     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    476     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    477     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    478     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    479     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    480     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    481     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    482     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    483     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    484     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    485     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    486     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    487     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    488     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    489     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    490     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
    491     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    492     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    493     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    494     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
    495     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    496     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    497     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    498     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    499 };
    500 
    501 static const int32_t ngrams_windows_1251[] = {
    502     0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
    503     0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
    504     0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
    505     0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
    506 };
    507 
    508 static const uint8_t charMap_windows_1251[] = {
    509     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    510     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    511     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    512     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    513     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    514     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    515     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    516     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    517     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    518     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    519     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    520     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    521     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    522     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    523     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    524     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    525     0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
    526     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
    527     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    528     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
    529     0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
    530     0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
    531     0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
    532     0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
    533     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    534     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    535     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    536     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    537     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    538     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    539     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    540     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    541 };
    542 
    543 static const int32_t ngrams_windows_1256[] = {
    544     0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
    545     0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
    546     0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
    547     0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
    548 };
    549 
    550 static const uint8_t charMap_windows_1256[] = {
    551     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    552     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    553     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    554     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    555     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    556     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    557     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    558     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    559     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    560     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    561     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    562     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    563     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    564     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    565     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    566     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    567     0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
    568     0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
    569     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    570     0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
    571     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    572     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    573     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    574     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    575     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    576     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    577     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
    578     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    579     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    580     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    581     0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
    582     0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
    583 };
    584 
    585 static const int32_t ngrams_KOI8_R[] = {
    586     0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
    587     0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
    588     0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
    589     0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
    590 };
    591 
    592 static const uint8_t charMap_KOI8_R[] = {
    593     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    594     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    595     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    596     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    597     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    598     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    599     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    600     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    601     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    602     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    603     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    604     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    605     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    606     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    607     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    608     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    609     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    610     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    611     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    612     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    613     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
    614     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    615     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
    616     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    617     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    618     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    619     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    620     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    621     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    622     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    623     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    624     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    625 };
    626 
    627 static const int32_t ngrams_IBM424_he_rtl[] = {
    628     0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
    629     0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
    630     0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
    631     0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
    632 };
    633 
    634 static const int32_t ngrams_IBM424_he_ltr[] = {
    635     0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
    636     0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
    637     0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
    638     0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
    639 };
    640 
    641 static const uint8_t charMap_IBM424_he[] = {
    642 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    643 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    644 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    645 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    646 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    647 /* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    648 /* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    649 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    650 /* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
    651 /* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    652 /* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    653 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    654 /* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    655 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    656 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    657 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    658 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    659 };
    660 
    661 static const int32_t ngrams_IBM420_ar_rtl[] = {
    662     0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
    663     0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
    664     0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
    665     0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
    666 };
    667 
    668 static const int32_t ngrams_IBM420_ar_ltr[] = {
    669     0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
    670     0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
    671     0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
    672     0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
    673 };
    674 
    675 static const uint8_t charMap_IBM420_ar[]= {
    676 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    677 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    678 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    679 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    680 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    681 /* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    682 /* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    683 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    684 /* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    685 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
    686 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
    687 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
    688 /* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
    689 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
    690 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    691 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
    692 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
    693 };
    694 
    695 //ISO-8859-1,2,5,6,7,8,9 Ngrams
    696 
    697 struct NGramsPlusLang {
    698     const int32_t ngrams[64];
    699     const char *  lang;
    700 };
    701 
    702 static const NGramsPlusLang ngrams_8859_1[] =  {
    703   {
    704     {
    705     0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
    706     0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
    707     0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
    708     0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
    709     },
    710     "en"
    711   },
    712   {
    713     {
    714     0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
    715     0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
    716     0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
    717     0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
    718     },
    719     "da"
    720   },
    721   {
    722     {
    723     0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
    724     0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
    725     0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
    726     0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
    727     },
    728     "de"
    729   },
    730   {
    731     {
    732     0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
    733     0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
    734     0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
    735     0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
    736     },
    737     "es"
    738   },
    739   {
    740     {
    741     0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
    742     0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
    743     0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
    744     0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
    745     },
    746     "fr"
    747   },
    748   {
    749     {
    750     0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
    751     0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
    752     0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
    753     0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
    754     },
    755     "it"
    756   },
    757   {
    758     {
    759     0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
    760     0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
    761     0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
    762     0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
    763     },
    764     "nl"
    765   },
    766   {
    767     {
    768     0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
    769     0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
    770     0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
    771     0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
    772     },
    773     "no"
    774   },
    775   {
    776     {
    777     0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
    778     0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
    779     0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
    780     0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
    781     },
    782     "pt"
    783   },
    784   {
    785     {
    786     0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
    787     0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
    788     0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
    789     0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
    790     },
    791     "sv"
    792   }
    793 };
    794 
    795 
    796 static const NGramsPlusLang ngrams_8859_2[] =  {
    797   {
    798     {
    799     0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
    800     0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
    801     0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
    802     0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
    803     },
    804     "cs"
    805   },
    806   {
    807     {
    808     0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
    809     0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
    810     0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
    811     0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
    812     },
    813     "hu"
    814   },
    815   {
    816     {
    817     0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
    818     0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
    819     0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
    820     0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
    821     },
    822     "pl"
    823   },
    824   {
    825     {
    826     0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
    827     0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
    828     0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
    829     0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
    830     },
    831     "ro"
    832   }
    833 };
    834 
    835 static const int32_t ngrams_8859_5_ru[] = {
    836     0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
    837     0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
    838     0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
    839     0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
    840 };
    841 
    842 static const int32_t ngrams_8859_6_ar[] = {
    843     0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
    844     0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
    845     0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
    846     0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
    847 };
    848 
    849 static const int32_t ngrams_8859_7_el[] = {
    850     0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
    851     0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
    852     0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
    853     0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
    854 };
    855 
    856 static const int32_t ngrams_8859_8_I_he[] = {
    857     0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
    858     0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
    859     0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
    860     0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
    861 };
    862 
    863 static const int32_t ngrams_8859_8_he[] = {
    864     0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
    865     0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
    866     0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
    867     0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
    868 };
    869 
    870 static const int32_t ngrams_8859_9_tr[] = {
    871     0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
    872     0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
    873     0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
    874     0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
    875 };
    876 
    877 CharsetRecog_8859_1::~CharsetRecog_8859_1()
    878 {
    879     // nothing to do
    880 }
    881 
    882 UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
    883     const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
    884     uint32_t i;
    885     int32_t bestConfidenceSoFar = -1;
    886     for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
    887         const int32_t *ngrams = ngrams_8859_1[i].ngrams;
    888         const char    *lang   = ngrams_8859_1[i].lang;
    889         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
    890         if (confidence > bestConfidenceSoFar) {
    891             results->set(textIn, this, confidence, name, lang);
    892             bestConfidenceSoFar = confidence;
    893         }
    894     }
    895     return (bestConfidenceSoFar > 0);
    896 }
    897 
    898 const char *CharsetRecog_8859_1::getName() const
    899 {
    900     return "ISO-8859-1";
    901 }
    902 
    903 
    904 CharsetRecog_8859_2::~CharsetRecog_8859_2()
    905 {
    906     // nothing to do
    907 }
    908 
    909 UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
    910     const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
    911     uint32_t i;
    912     int32_t bestConfidenceSoFar = -1;
    913     for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
    914         const int32_t *ngrams = ngrams_8859_2[i].ngrams;
    915         const char    *lang   = ngrams_8859_2[i].lang;
    916         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
    917         if (confidence > bestConfidenceSoFar) {
    918             results->set(textIn, this, confidence, name, lang);
    919             bestConfidenceSoFar = confidence;
    920         }
    921     }
    922     return (bestConfidenceSoFar > 0);
    923 }
    924 
    925 const char *CharsetRecog_8859_2::getName() const
    926 {
    927     return "ISO-8859-2";
    928 }
    929 
    930 
    931 CharsetRecog_8859_5::~CharsetRecog_8859_5()
    932 {
    933     // nothing to do
    934 }
    935 
    936 const char *CharsetRecog_8859_5::getName() const
    937 {
    938     return "ISO-8859-5";
    939 }
    940 
    941 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
    942 {
    943     // nothing to do
    944 }
    945 
    946 const char *CharsetRecog_8859_5_ru::getLanguage() const
    947 {
    948     return "ru";
    949 }
    950 
    951 UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
    952 {
    953     int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
    954     results->set(textIn, this, confidence);
    955     return (confidence > 0);
    956 }
    957 
    958 CharsetRecog_8859_6::~CharsetRecog_8859_6()
    959 {
    960     // nothing to do
    961 }
    962 
    963 const char *CharsetRecog_8859_6::getName() const
    964 {
    965     return "ISO-8859-6";
    966 }
    967 
    968 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
    969 {
    970     // nothing to do
    971 }
    972 
    973 const char *CharsetRecog_8859_6_ar::getLanguage() const
    974 {
    975     return "ar";
    976 }
    977 
    978 UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
    979 {
    980     int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
    981     results->set(textIn, this, confidence);
    982     return (confidence > 0);
    983 }
    984 
    985 CharsetRecog_8859_7::~CharsetRecog_8859_7()
    986 {
    987     // nothing to do
    988 }
    989 
    990 const char *CharsetRecog_8859_7::getName() const
    991 {
    992     return "ISO-8859-7";
    993 }
    994 
    995 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
    996 {
    997     // nothing to do
    998 }
    999 
   1000 const char *CharsetRecog_8859_7_el::getLanguage() const
   1001 {
   1002     return "el";
   1003 }
   1004 
   1005 UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
   1006 {
   1007     const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
   1008     int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
   1009     results->set(textIn, this, confidence, name, "el");
   1010     return (confidence > 0);
   1011 }
   1012 
   1013 CharsetRecog_8859_8::~CharsetRecog_8859_8()
   1014 {
   1015     // nothing to do
   1016 }
   1017 
   1018 const char *CharsetRecog_8859_8::getName() const
   1019 {
   1020     return "ISO-8859-8";
   1021 }
   1022 
   1023 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
   1024 {
   1025     // nothing to do
   1026 }
   1027 
   1028 const char *CharsetRecog_8859_8_I_he::getName() const
   1029 {
   1030     return "ISO-8859-8-I";
   1031 }
   1032 
   1033 const char *CharsetRecog_8859_8_I_he::getLanguage() const
   1034 {
   1035     return "he";
   1036 }
   1037 
   1038 UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
   1039 {
   1040     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
   1041     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
   1042     results->set(textIn, this, confidence, name, "he");
   1043     return (confidence > 0);
   1044 }
   1045 
   1046 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
   1047 {
   1048     // od ot gnihton
   1049 }
   1050 
   1051 const char *CharsetRecog_8859_8_he::getLanguage() const
   1052 {
   1053     return "he";
   1054 }
   1055 
   1056 UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
   1057 {
   1058     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
   1059     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
   1060     results->set(textIn, this, confidence, name, "he");
   1061     return (confidence > 0);
   1062 }
   1063 
   1064 CharsetRecog_8859_9::~CharsetRecog_8859_9()
   1065 {
   1066     // nothing to do
   1067 }
   1068 
   1069 const char *CharsetRecog_8859_9::getName() const
   1070 {
   1071     return "ISO-8859-9";
   1072 }
   1073 
   1074 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
   1075 {
   1076     // nothing to do
   1077 }
   1078 
   1079 const char *CharsetRecog_8859_9_tr::getLanguage() const
   1080 {
   1081     return "tr";
   1082 }
   1083 
   1084 UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
   1085 {
   1086     const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
   1087     int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
   1088     results->set(textIn, this, confidence, name, "tr");
   1089     return (confidence > 0);
   1090 }
   1091 
   1092 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
   1093 {
   1094     // nothing to do
   1095 }
   1096 
   1097 const char *CharsetRecog_windows_1256::getName() const
   1098 {
   1099     return  "windows-1256";
   1100 }
   1101 
   1102 const char *CharsetRecog_windows_1256::getLanguage() const
   1103 {
   1104     return "ar";
   1105 }
   1106 
   1107 UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
   1108 {
   1109     int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
   1110     results->set(textIn, this, confidence);
   1111     return (confidence > 0);
   1112 }
   1113 
   1114 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
   1115 {
   1116     // nothing to do
   1117 }
   1118 
   1119 const char *CharsetRecog_windows_1251::getName() const
   1120 {
   1121     return  "windows-1251";
   1122 }
   1123 
   1124 const char *CharsetRecog_windows_1251::getLanguage() const
   1125 {
   1126     return "ru";
   1127 }
   1128 
   1129 UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
   1130 {
   1131     int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
   1132     results->set(textIn, this, confidence);
   1133     return (confidence > 0);
   1134 }
   1135 
   1136 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
   1137 {
   1138     // nothing to do
   1139 }
   1140 
   1141 const char *CharsetRecog_KOI8_R::getName() const
   1142 {
   1143     return  "KOI8-R";
   1144 }
   1145 
   1146 const char *CharsetRecog_KOI8_R::getLanguage() const
   1147 {
   1148     return "ru";
   1149 }
   1150 
   1151 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
   1152 {
   1153     int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
   1154     results->set(textIn, this, confidence);
   1155     return (confidence > 0);
   1156 }
   1157 
   1158 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
   1159 {
   1160     // nothing to do
   1161 }
   1162 
   1163 const char *CharsetRecog_IBM424_he::getLanguage() const
   1164 {
   1165     return "he";
   1166 }
   1167 
   1168 CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
   1169 {
   1170     // nothing to do
   1171 }
   1172 
   1173 const char *CharsetRecog_IBM424_he_rtl::getName() const
   1174 {
   1175     return  "IBM424_rtl";
   1176 }
   1177 
   1178 UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
   1179 {
   1180     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
   1181     results->set(textIn, this, confidence);
   1182     return (confidence > 0);
   1183 }
   1184 
   1185 CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
   1186 {
   1187     // nothing to do
   1188 }
   1189 
   1190 const char *CharsetRecog_IBM424_he_ltr::getName() const
   1191 {
   1192     return  "IBM424_ltr";
   1193 }
   1194 
   1195 UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
   1196 {
   1197     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
   1198     results->set(textIn, this, confidence);
   1199     return (confidence > 0);
   1200 }
   1201 
   1202 CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
   1203 {
   1204     // nothing to do
   1205 }
   1206 
   1207 const char *CharsetRecog_IBM420_ar::getLanguage() const
   1208 {
   1209     return "ar";
   1210 }
   1211 
   1212 
   1213 int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
   1214 {
   1215     NGramParser_IBM420 parser(ngrams, byteMap);
   1216     int32_t result;
   1217 
   1218     result = parser.parse(det);
   1219 
   1220     return result;
   1221 }
   1222 
   1223 CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
   1224 {
   1225     // nothing to do
   1226 }
   1227 
   1228 const char *CharsetRecog_IBM420_ar_rtl::getName() const
   1229 {
   1230     return  "IBM420_rtl";
   1231 }
   1232 
   1233 UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
   1234 {
   1235     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
   1236     results->set(textIn, this, confidence);
   1237     return (confidence > 0);
   1238 }
   1239 
   1240 CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
   1241 {
   1242     // nothing to do
   1243 }
   1244 
   1245 const char *CharsetRecog_IBM420_ar_ltr::getName() const
   1246 {
   1247     return  "IBM420_ltr";
   1248 }
   1249 
   1250 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
   1251 {
   1252     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
   1253     results->set(textIn, this, confidence);
   1254     return (confidence > 0);
   1255 }
   1256 
   1257 U_NAMESPACE_END
   1258 #endif
   1259 
   1260