Home | History | Annotate | Download | only in i18n
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2005-2015, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #include "cmemory.h"
     11 
     12 #if !UCONFIG_NO_CONVERSION
     13 #include "csrsbcs.h"
     14 #include "csmatch.h"
     15 
     16 #define N_GRAM_SIZE 3
     17 #define N_GRAM_MASK 0xFFFFFF
     18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
     23  : ngram(0), byteIndex(0)
     24 {
     25     ngramList = theNgramList;
     26     charMap   = theCharMap;
     27 
     28     ngramCount = hitCount = 0;
     29 }
     30 
     31 NGramParser::~NGramParser()
     32 {
     33 }
     34 
     35 /*
     36  * Binary search for value in table, which must have exactly 64 entries.
     37  */
     38 
     39 int32_t NGramParser::search(const int32_t *table, int32_t value)
     40 {
     41     int32_t index = 0;
     42 
     43     if (table[index + 32] <= value) {
     44         index += 32;
     45     }
     46 
     47     if (table[index + 16] <= value) {
     48         index += 16;
     49     }
     50 
     51     if (table[index + 8] <= value) {
     52         index += 8;
     53     }
     54 
     55     if (table[index + 4] <= value) {
     56         index += 4;
     57     }
     58 
     59     if (table[index + 2] <= value) {
     60         index += 2;
     61     }
     62 
     63     if (table[index + 1] <= value) {
     64         index += 1;
     65     }
     66 
     67     if (table[index] > value) {
     68         index -= 1;
     69     }
     70 
     71     if (index < 0 || table[index] != value) {
     72         return -1;
     73     }
     74 
     75     return index;
     76 }
     77 
     78 void NGramParser::lookup(int32_t thisNgram)
     79 {
     80     ngramCount += 1;
     81 
     82     if (search(ngramList, thisNgram) >= 0) {
     83         hitCount += 1;
     84     }
     85 
     86 }
     87 
     88 void NGramParser::addByte(int32_t b)
     89 {
     90     ngram = ((ngram << 8) + b) & N_GRAM_MASK;
     91     lookup(ngram);
     92 }
     93 
     94 int32_t NGramParser::nextByte(InputText *det)
     95 {
     96     if (byteIndex >= det->fInputLen) {
     97         return -1;
     98     }
     99 
    100     return det->fInputBytes[byteIndex++];
    101 }
    102 
    103 void NGramParser::parseCharacters(InputText *det)
    104 {
    105     int32_t b;
    106     bool ignoreSpace = FALSE;
    107 
    108     while ((b = nextByte(det)) >= 0) {
    109         uint8_t mb = charMap[b];
    110 
    111         // TODO: 0x20 might not be a space in all character sets...
    112         if (mb != 0) {
    113             if (!(mb == 0x20 && ignoreSpace)) {
    114                 addByte(mb);
    115             }
    116 
    117             ignoreSpace = (mb == 0x20);
    118         }
    119     }
    120 }
    121 
    122 int32_t NGramParser::parse(InputText *det)
    123 {
    124     parseCharacters(det);
    125 
    126     // TODO: Is this OK? The buffer could have ended in the middle of a word...
    127     addByte(0x20);
    128 
    129     double rawPercent = (double) hitCount / (double) ngramCount;
    130 
    131     //            if (rawPercent <= 2.0) {
    132     //                return 0;
    133     //            }
    134 
    135     // TODO - This is a bit of a hack to take care of a case
    136     // were we were getting a confidence of 135...
    137     if (rawPercent > 0.33) {
    138         return 98;
    139     }
    140 
    141     return (int32_t) (rawPercent * 300.0);
    142 }
    143 
    144 #if !UCONFIG_ONLY_HTML_CONVERSION
    145 static const uint8_t unshapeMap_IBM420[] = {
    146 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    147 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    148 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    149 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    150 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    151 /* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
    152 /* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
    153 /* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    154 /* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
    155 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
    156 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
    157 /* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
    158 /* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
    159 /* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
    160 /* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
    161 /* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    162 /* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    163 };
    164 
    165 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
    166 {
    167 	alef = 0x00;
    168 }
    169 
    170 
    171 int32_t NGramParser_IBM420::isLamAlef(int32_t b)
    172 {
    173 	if(b == 0xB2 || b == 0xB3){
    174          	return 0x47;
    175         }else if(b == 0xB4 || b == 0xB5){
    176          	return 0x49;
    177         }else if(b == 0xB8 || b == 0xB9){
    178          	return 0x56;
    179         }else
    180          	return 0x00;
    181 }
    182 
    183 /*
    184 * Arabic shaping needs to be done manually. Cannot call ArabicShaping class
    185 * because CharsetDetector is dealing with bytes not Unicode code points. We could
    186 * convert the bytes to Unicode code points but that would leave us dependent
    187 * on CharsetICU which we try to avoid. IBM420 converter amongst different versions
    188 * of JDK can produce different results and therefore is also avoided.
    189 */
    190 int32_t NGramParser_IBM420::nextByte(InputText *det)
    191 {
    192 
    193     if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
    194         return -1;
    195     }
    196     int next;
    197 
    198     alef = isLamAlef(det->fInputBytes[byteIndex]);
    199     if(alef != 0x00)
    200         next = 0xB1 & 0xFF;
    201     else
    202         next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
    203 
    204     byteIndex++;
    205 
    206     return next;
    207 }
    208 
    209 void NGramParser_IBM420::parseCharacters(InputText *det)
    210 {
    211 	int32_t b;
    212     bool ignoreSpace = FALSE;
    213 
    214     while ((b = nextByte(det)) >= 0) {
    215         uint8_t mb = charMap[b];
    216 
    217         // TODO: 0x20 might not be a space in all character sets...
    218         if (mb != 0) {
    219             if (!(mb == 0x20 && ignoreSpace)) {
    220                 addByte(mb);
    221             }
    222             ignoreSpace = (mb == 0x20);
    223         }
    224 
    225 		if(alef != 0x00){
    226             mb = charMap[alef & 0xFF];
    227 
    228             // TODO: 0x20 might not be a space in all character sets...
    229             if (mb != 0) {
    230                 if (!(mb == 0x20 && ignoreSpace)) {
    231                     addByte(mb);
    232                 }
    233 
    234                 ignoreSpace = (mb == 0x20);
    235             }
    236 
    237         }
    238     }
    239 }
    240 #endif
    241 
    242 CharsetRecog_sbcs::CharsetRecog_sbcs()
    243 {
    244     // nothing else to do
    245 }
    246 
    247 CharsetRecog_sbcs::~CharsetRecog_sbcs()
    248 {
    249     // nothing to do
    250 }
    251 
    252 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
    253 {
    254     NGramParser parser(ngrams, byteMap);
    255     int32_t result;
    256 
    257     result = parser.parse(det);
    258 
    259     return result;
    260 }
    261 
    262 static const uint8_t charMap_8859_1[] = {
    263     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    264     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    265     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    266     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    267     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    268     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    269     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    270     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    271     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    272     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    273     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    274     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    275     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    276     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    277     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    278     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    279     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    280     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    281     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    282     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    283     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    284     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    285     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    286     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
    287     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    288     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    289     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    290     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
    291     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    292     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    293     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    294     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    295 };
    296 
    297 static const uint8_t charMap_8859_2[] = {
    298     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    299     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    300     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    301     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    302     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    303     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    304     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    305     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    306     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    307     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    308     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    309     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    310     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    311     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    312     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    313     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    314     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    315     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    316     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    317     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    318     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
    319     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
    320     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
    321     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
    322     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    323     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    324     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    325     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
    326     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    327     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    328     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    329     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
    330 };
    331 
    332 static const uint8_t charMap_8859_5[] = {
    333     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    334     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    335     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    336     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    337     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    338     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    339     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    340     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    341     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    342     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    343     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    344     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    345     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    346     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    347     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    348     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    349     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    350     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    351     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    352     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    353     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    354     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
    355     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    356     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    357     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    358     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    359     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    360     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    361     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    362     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    363     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    364     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
    365 };
    366 
    367 static const uint8_t charMap_8859_6[] = {
    368     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    369     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    370     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    371     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    372     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    373     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    374     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    375     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    376     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    377     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    378     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    379     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    380     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    381     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    382     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    383     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    384     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    385     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    386     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    387     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    388     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    389     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    390     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    391     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    392     0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    393     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    394     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    395     0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
    396     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    397     0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
    398     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    399     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    400 };
    401 
    402 static const uint8_t charMap_8859_7[] = {
    403     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    404     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    405     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    406     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    407     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    408     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    409     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    410     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    411     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    412     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    413     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    414     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    415     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    416     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    417     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    418     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    419     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    420     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    421     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    422     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    423     0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
    424     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    425     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
    426     0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
    427     0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    428     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    429     0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    430     0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
    431     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    432     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    433     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    434     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
    435 };
    436 
    437 static const uint8_t charMap_8859_8[] = {
    438     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    439     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    440     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    441     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    442     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    443     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    444     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    445     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    446     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    447     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    448     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    449     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    450     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    451     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    452     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    453     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    454     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    455     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    456     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    457     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    458     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    459     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    460     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    461     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    462     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    463     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    464     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    465     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    466     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    467     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    468     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    469     0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
    470 };
    471 
    472 static const uint8_t charMap_8859_9[] = {
    473     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    474     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    475     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    476     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    477     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    478     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    479     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    480     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    481     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    482     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    483     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    484     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    485     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    486     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    487     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    488     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    489     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    490     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    491     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    492     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    493     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    494     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    495     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    496     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
    497     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    498     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    499     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    500     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
    501     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    502     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    503     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    504     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    505 };
    506 
    507 static const int32_t ngrams_windows_1251[] = {
    508     0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
    509     0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
    510     0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
    511     0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
    512 };
    513 
    514 static const uint8_t charMap_windows_1251[] = {
    515     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    516     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    517     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    518     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    519     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    520     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    521     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    522     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    523     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    524     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    525     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    526     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    527     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    528     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    529     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    530     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    531     0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
    532     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
    533     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    534     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
    535     0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
    536     0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
    537     0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
    538     0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
    539     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    540     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    541     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    542     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    543     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    544     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    545     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    546     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    547 };
    548 
    549 static const int32_t ngrams_windows_1256[] = {
    550     0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
    551     0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
    552     0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
    553     0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
    554 };
    555 
    556 static const uint8_t charMap_windows_1256[] = {
    557     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    558     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    559     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    560     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    561     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    562     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    563     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    564     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    565     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    566     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    567     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    568     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    569     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    570     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    571     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    572     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    573     0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
    574     0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
    575     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    576     0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
    577     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    578     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    579     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    580     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    581     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    582     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    583     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
    584     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    585     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    586     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    587     0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
    588     0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
    589 };
    590 
    591 static const int32_t ngrams_KOI8_R[] = {
    592     0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
    593     0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
    594     0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
    595     0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
    596 };
    597 
    598 static const uint8_t charMap_KOI8_R[] = {
    599     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    600     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    601     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    602     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    603     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    604     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    605     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    606     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    607     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    608     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    609     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    610     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    611     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    612     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    613     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    614     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    615     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    616     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    617     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    618     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    619     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
    620     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    621     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
    622     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    623     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    624     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    625     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    626     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    627     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    628     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    629     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    630     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    631 };
    632 
    633 #if !UCONFIG_ONLY_HTML_CONVERSION
    634 static const int32_t ngrams_IBM424_he_rtl[] = {
    635     0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
    636     0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
    637     0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
    638     0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
    639 };
    640 
    641 static const int32_t ngrams_IBM424_he_ltr[] = {
    642     0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
    643     0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
    644     0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
    645     0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
    646 };
    647 
    648 static const uint8_t charMap_IBM424_he[] = {
    649 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    650 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    651 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    652 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    653 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    654 /* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    655 /* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    656 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    657 /* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
    658 /* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    659 /* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    660 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    661 /* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    662 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    663 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    664 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    665 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    666 };
    667 
    668 static const int32_t ngrams_IBM420_ar_rtl[] = {
    669     0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
    670     0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
    671     0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
    672     0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
    673 };
    674 
    675 static const int32_t ngrams_IBM420_ar_ltr[] = {
    676     0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
    677     0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
    678     0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
    679     0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
    680 };
    681 
    682 static const uint8_t charMap_IBM420_ar[]= {
    683 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    684 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    685 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    686 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    687 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    688 /* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    689 /* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    690 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    691 /* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    692 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
    693 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
    694 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
    695 /* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
    696 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
    697 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    698 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
    699 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
    700 };
    701 #endif
    702 
    703 //ISO-8859-1,2,5,6,7,8,9 Ngrams
    704 
    705 struct NGramsPlusLang {
    706     const int32_t ngrams[64];
    707     const char *  lang;
    708 };
    709 
    710 static const NGramsPlusLang ngrams_8859_1[] =  {
    711   {
    712     {
    713     0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
    714     0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
    715     0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
    716     0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
    717     },
    718     "en"
    719   },
    720   {
    721     {
    722     0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
    723     0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
    724     0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
    725     0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
    726     },
    727     "da"
    728   },
    729   {
    730     {
    731     0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
    732     0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
    733     0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
    734     0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
    735     },
    736     "de"
    737   },
    738   {
    739     {
    740     0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
    741     0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
    742     0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
    743     0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
    744     },
    745     "es"
    746   },
    747   {
    748     {
    749     0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
    750     0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
    751     0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
    752     0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
    753     },
    754     "fr"
    755   },
    756   {
    757     {
    758     0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
    759     0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
    760     0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
    761     0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
    762     },
    763     "it"
    764   },
    765   {
    766     {
    767     0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
    768     0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
    769     0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
    770     0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
    771     },
    772     "nl"
    773   },
    774   {
    775     {
    776     0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
    777     0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
    778     0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
    779     0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
    780     },
    781     "no"
    782   },
    783   {
    784     {
    785     0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
    786     0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
    787     0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
    788     0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
    789     },
    790     "pt"
    791   },
    792   {
    793     {
    794     0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
    795     0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
    796     0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
    797     0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
    798     },
    799     "sv"
    800   }
    801 };
    802 
    803 
    804 static const NGramsPlusLang ngrams_8859_2[] =  {
    805   {
    806     {
    807     0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
    808     0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
    809     0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
    810     0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
    811     },
    812     "cs"
    813   },
    814   {
    815     {
    816     0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
    817     0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
    818     0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
    819     0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
    820     },
    821     "hu"
    822   },
    823   {
    824     {
    825     0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
    826     0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
    827     0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
    828     0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
    829     },
    830     "pl"
    831   },
    832   {
    833     {
    834     0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
    835     0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
    836     0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
    837     0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
    838     },
    839     "ro"
    840   }
    841 };
    842 
    843 static const int32_t ngrams_8859_5_ru[] = {
    844     0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
    845     0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
    846     0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
    847     0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
    848 };
    849 
    850 static const int32_t ngrams_8859_6_ar[] = {
    851     0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
    852     0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
    853     0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
    854     0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
    855 };
    856 
    857 static const int32_t ngrams_8859_7_el[] = {
    858     0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
    859     0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
    860     0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
    861     0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
    862 };
    863 
    864 static const int32_t ngrams_8859_8_I_he[] = {
    865     0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
    866     0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
    867     0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
    868     0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
    869 };
    870 
    871 static const int32_t ngrams_8859_8_he[] = {
    872     0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
    873     0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
    874     0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
    875     0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
    876 };
    877 
    878 static const int32_t ngrams_8859_9_tr[] = {
    879     0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
    880     0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
    881     0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
    882     0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
    883 };
    884 
    885 CharsetRecog_8859_1::~CharsetRecog_8859_1()
    886 {
    887     // nothing to do
    888 }
    889 
    890 UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
    891     const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
    892     uint32_t i;
    893     int32_t bestConfidenceSoFar = -1;
    894     for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
    895         const int32_t *ngrams = ngrams_8859_1[i].ngrams;
    896         const char    *lang   = ngrams_8859_1[i].lang;
    897         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
    898         if (confidence > bestConfidenceSoFar) {
    899             results->set(textIn, this, confidence, name, lang);
    900             bestConfidenceSoFar = confidence;
    901         }
    902     }
    903     return (bestConfidenceSoFar > 0);
    904 }
    905 
    906 const char *CharsetRecog_8859_1::getName() const
    907 {
    908     return "ISO-8859-1";
    909 }
    910 
    911 
    912 CharsetRecog_8859_2::~CharsetRecog_8859_2()
    913 {
    914     // nothing to do
    915 }
    916 
    917 UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
    918     const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
    919     uint32_t i;
    920     int32_t bestConfidenceSoFar = -1;
    921     for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
    922         const int32_t *ngrams = ngrams_8859_2[i].ngrams;
    923         const char    *lang   = ngrams_8859_2[i].lang;
    924         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
    925         if (confidence > bestConfidenceSoFar) {
    926             results->set(textIn, this, confidence, name, lang);
    927             bestConfidenceSoFar = confidence;
    928         }
    929     }
    930     return (bestConfidenceSoFar > 0);
    931 }
    932 
    933 const char *CharsetRecog_8859_2::getName() const
    934 {
    935     return "ISO-8859-2";
    936 }
    937 
    938 
    939 CharsetRecog_8859_5::~CharsetRecog_8859_5()
    940 {
    941     // nothing to do
    942 }
    943 
    944 const char *CharsetRecog_8859_5::getName() const
    945 {
    946     return "ISO-8859-5";
    947 }
    948 
    949 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
    950 {
    951     // nothing to do
    952 }
    953 
    954 const char *CharsetRecog_8859_5_ru::getLanguage() const
    955 {
    956     return "ru";
    957 }
    958 
    959 UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
    960 {
    961     int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
    962     results->set(textIn, this, confidence);
    963     return (confidence > 0);
    964 }
    965 
    966 CharsetRecog_8859_6::~CharsetRecog_8859_6()
    967 {
    968     // nothing to do
    969 }
    970 
    971 const char *CharsetRecog_8859_6::getName() const
    972 {
    973     return "ISO-8859-6";
    974 }
    975 
    976 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
    977 {
    978     // nothing to do
    979 }
    980 
    981 const char *CharsetRecog_8859_6_ar::getLanguage() const
    982 {
    983     return "ar";
    984 }
    985 
    986 UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
    987 {
    988     int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
    989     results->set(textIn, this, confidence);
    990     return (confidence > 0);
    991 }
    992 
    993 CharsetRecog_8859_7::~CharsetRecog_8859_7()
    994 {
    995     // nothing to do
    996 }
    997 
    998 const char *CharsetRecog_8859_7::getName() const
    999 {
   1000     return "ISO-8859-7";
   1001 }
   1002 
   1003 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
   1004 {
   1005     // nothing to do
   1006 }
   1007 
   1008 const char *CharsetRecog_8859_7_el::getLanguage() const
   1009 {
   1010     return "el";
   1011 }
   1012 
   1013 UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
   1014 {
   1015     const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
   1016     int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
   1017     results->set(textIn, this, confidence, name, "el");
   1018     return (confidence > 0);
   1019 }
   1020 
   1021 CharsetRecog_8859_8::~CharsetRecog_8859_8()
   1022 {
   1023     // nothing to do
   1024 }
   1025 
   1026 const char *CharsetRecog_8859_8::getName() const
   1027 {
   1028     return "ISO-8859-8";
   1029 }
   1030 
   1031 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
   1032 {
   1033     // nothing to do
   1034 }
   1035 
   1036 const char *CharsetRecog_8859_8_I_he::getName() const
   1037 {
   1038     return "ISO-8859-8-I";
   1039 }
   1040 
   1041 const char *CharsetRecog_8859_8_I_he::getLanguage() const
   1042 {
   1043     return "he";
   1044 }
   1045 
   1046 UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
   1047 {
   1048     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
   1049     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
   1050     results->set(textIn, this, confidence, name, "he");
   1051     return (confidence > 0);
   1052 }
   1053 
   1054 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
   1055 {
   1056     // od ot gnihton
   1057 }
   1058 
   1059 const char *CharsetRecog_8859_8_he::getLanguage() const
   1060 {
   1061     return "he";
   1062 }
   1063 
   1064 UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
   1065 {
   1066     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
   1067     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
   1068     results->set(textIn, this, confidence, name, "he");
   1069     return (confidence > 0);
   1070 }
   1071 
   1072 CharsetRecog_8859_9::~CharsetRecog_8859_9()
   1073 {
   1074     // nothing to do
   1075 }
   1076 
   1077 const char *CharsetRecog_8859_9::getName() const
   1078 {
   1079     return "ISO-8859-9";
   1080 }
   1081 
   1082 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
   1083 {
   1084     // nothing to do
   1085 }
   1086 
   1087 const char *CharsetRecog_8859_9_tr::getLanguage() const
   1088 {
   1089     return "tr";
   1090 }
   1091 
   1092 UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
   1093 {
   1094     const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
   1095     int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
   1096     results->set(textIn, this, confidence, name, "tr");
   1097     return (confidence > 0);
   1098 }
   1099 
   1100 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
   1101 {
   1102     // nothing to do
   1103 }
   1104 
   1105 const char *CharsetRecog_windows_1256::getName() const
   1106 {
   1107     return  "windows-1256";
   1108 }
   1109 
   1110 const char *CharsetRecog_windows_1256::getLanguage() const
   1111 {
   1112     return "ar";
   1113 }
   1114 
   1115 UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
   1116 {
   1117     int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
   1118     results->set(textIn, this, confidence);
   1119     return (confidence > 0);
   1120 }
   1121 
   1122 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
   1123 {
   1124     // nothing to do
   1125 }
   1126 
   1127 const char *CharsetRecog_windows_1251::getName() const
   1128 {
   1129     return  "windows-1251";
   1130 }
   1131 
   1132 const char *CharsetRecog_windows_1251::getLanguage() const
   1133 {
   1134     return "ru";
   1135 }
   1136 
   1137 UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
   1138 {
   1139     int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
   1140     results->set(textIn, this, confidence);
   1141     return (confidence > 0);
   1142 }
   1143 
   1144 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
   1145 {
   1146     // nothing to do
   1147 }
   1148 
   1149 const char *CharsetRecog_KOI8_R::getName() const
   1150 {
   1151     return  "KOI8-R";
   1152 }
   1153 
   1154 const char *CharsetRecog_KOI8_R::getLanguage() const
   1155 {
   1156     return "ru";
   1157 }
   1158 
   1159 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
   1160 {
   1161     int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
   1162     results->set(textIn, this, confidence);
   1163     return (confidence > 0);
   1164 }
   1165 
   1166 #if !UCONFIG_ONLY_HTML_CONVERSION
   1167 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
   1168 {
   1169     // nothing to do
   1170 }
   1171 
   1172 const char *CharsetRecog_IBM424_he::getLanguage() const
   1173 {
   1174     return "he";
   1175 }
   1176 
   1177 CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
   1178 {
   1179     // nothing to do
   1180 }
   1181 
   1182 const char *CharsetRecog_IBM424_he_rtl::getName() const
   1183 {
   1184     return  "IBM424_rtl";
   1185 }
   1186 
   1187 UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
   1188 {
   1189     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
   1190     results->set(textIn, this, confidence);
   1191     return (confidence > 0);
   1192 }
   1193 
   1194 CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
   1195 {
   1196     // nothing to do
   1197 }
   1198 
   1199 const char *CharsetRecog_IBM424_he_ltr::getName() const
   1200 {
   1201     return  "IBM424_ltr";
   1202 }
   1203 
   1204 UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
   1205 {
   1206     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
   1207     results->set(textIn, this, confidence);
   1208     return (confidence > 0);
   1209 }
   1210 
   1211 CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
   1212 {
   1213     // nothing to do
   1214 }
   1215 
   1216 const char *CharsetRecog_IBM420_ar::getLanguage() const
   1217 {
   1218     return "ar";
   1219 }
   1220 
   1221 
   1222 int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
   1223 {
   1224     NGramParser_IBM420 parser(ngrams, byteMap);
   1225     int32_t result;
   1226 
   1227     result = parser.parse(det);
   1228 
   1229     return result;
   1230 }
   1231 
   1232 CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
   1233 {
   1234     // nothing to do
   1235 }
   1236 
   1237 const char *CharsetRecog_IBM420_ar_rtl::getName() const
   1238 {
   1239     return  "IBM420_rtl";
   1240 }
   1241 
   1242 UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
   1243 {
   1244     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
   1245     results->set(textIn, this, confidence);
   1246     return (confidence > 0);
   1247 }
   1248 
   1249 CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
   1250 {
   1251     // nothing to do
   1252 }
   1253 
   1254 const char *CharsetRecog_IBM420_ar_ltr::getName() const
   1255 {
   1256     return  "IBM420_ltr";
   1257 }
   1258 
   1259 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
   1260 {
   1261     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
   1262     results->set(textIn, this, confidence);
   1263     return (confidence > 0);
   1264 }
   1265 #endif
   1266 
   1267 U_NAMESPACE_END
   1268 #endif
   1269 
   1270