Home | History | Annotate | Download | only in i18n
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2005-2012, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #include "cmemory.h"
     11 
     12 #if !UCONFIG_NO_CONVERSION
     13 #include "csrsbcs.h"
     14 #include "csmatch.h"
     15 
     16 #define N_GRAM_SIZE 3
     17 #define N_GRAM_MASK 0xFFFFFF
     18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
     23   :byteIndex(0), ngram(0)
     24 {
     25     ngramList = theNgramList;
     26     charMap   = theCharMap;
     27 
     28     ngramCount = hitCount = 0;
     29 }
     30 
     31 /*
     32  * Binary search for value in table, which must have exactly 64 entries.
     33  */
     34 
     35 int32_t NGramParser::search(const int32_t *table, int32_t value)
     36 {
     37     int32_t index = 0;
     38 
     39     if (table[index + 32] <= value) {
     40         index += 32;
     41     }
     42 
     43     if (table[index + 16] <= value) {
     44         index += 16;
     45     }
     46 
     47     if (table[index + 8] <= value) {
     48         index += 8;
     49     }
     50 
     51     if (table[index + 4] <= value) {
     52         index += 4;
     53     }
     54 
     55     if (table[index + 2] <= value) {
     56         index += 2;
     57     }
     58 
     59     if (table[index + 1] <= value) {
     60         index += 1;
     61     }
     62 
     63     if (table[index] > value) {
     64         index -= 1;
     65     }
     66 
     67     if (index < 0 || table[index] != value) {
     68         return -1;
     69     }
     70 
     71     return index;
     72 }
     73 
     74 void NGramParser::lookup(int32_t thisNgram)
     75 {
     76     ngramCount += 1;
     77 
     78     if (search(ngramList, thisNgram) >= 0) {
     79         hitCount += 1;
     80     }
     81 
     82 }
     83 
     84 void NGramParser::addByte(int32_t b)
     85 {
     86     ngram = ((ngram << 8) + b) & N_GRAM_MASK;
     87     lookup(ngram);
     88 }
     89 
     90 int32_t NGramParser::nextByte(InputText *det)
     91 {
     92     if (byteIndex >= det->fInputLen) {
     93         return -1;
     94     }
     95 
     96     return det->fInputBytes[byteIndex++];
     97 }
     98 
     99 int32_t NGramParser::parse(InputText *det)
    100 {
    101     int32_t b;
    102     bool ignoreSpace = FALSE;
    103 
    104     while ((b = nextByte(det)) >= 0) {
    105         uint8_t mb = charMap[b];
    106 
    107         // TODO: 0x20 might not be a space in all character sets...
    108         if (mb != 0) {
    109             if (!(mb == 0x20 && ignoreSpace)) {
    110                 addByte(mb);
    111             }
    112 
    113             ignoreSpace = (mb == 0x20);
    114         }
    115     }
    116 
    117     // TODO: Is this OK? The buffer could have ended in the middle of a word...
    118     addByte(0x20);
    119 
    120     double rawPercent = (double) hitCount / (double) ngramCount;
    121 
    122     //            if (rawPercent <= 2.0) {
    123     //                return 0;
    124     //            }
    125 
    126     // TODO - This is a bit of a hack to take care of a case
    127     // were we were getting a confidence of 135...
    128     if (rawPercent > 0.33) {
    129         return 98;
    130     }
    131 
    132     return (int32_t) (rawPercent * 300.0);
    133 }
    134 
    135 CharsetRecog_sbcs::CharsetRecog_sbcs()
    136 {
    137     // nothing else to do
    138 }
    139 
    140 CharsetRecog_sbcs::~CharsetRecog_sbcs()
    141 {
    142     // nothing to do
    143 }
    144 
    145 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
    146 {
    147     NGramParser parser(ngrams, byteMap);
    148     int32_t result;
    149 
    150     result = parser.parse(det);
    151 
    152     return result;
    153 }
    154 
    155 static const uint8_t charMap_8859_1[] = {
    156     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    157     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    158     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    159     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    160     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    161     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    162     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    163     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    164     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    165     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    166     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    167     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    168     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    169     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    170     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    171     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    172     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    173     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    174     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    175     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    176     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    177     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    178     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    179     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
    180     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    181     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    182     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    183     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
    184     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    185     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    186     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    187     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    188 };
    189 
    190 static const uint8_t charMap_8859_2[] = {
    191     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    192     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    193     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    194     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    195     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    196     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    197     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    198     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    199     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    200     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    201     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    202     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    203     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    204     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    205     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    206     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    207     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    208     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    209     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    210     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    211     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
    212     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
    213     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
    214     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
    215     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    216     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    217     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    218     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
    219     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    220     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    221     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    222     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
    223 };
    224 
    225 static const uint8_t charMap_8859_5[] = {
    226     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    227     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    228     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    229     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    230     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    231     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    232     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    233     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    234     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    235     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    236     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    237     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    238     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    239     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    240     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    241     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    242     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    243     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    244     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    245     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    246     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    247     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
    248     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    249     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    250     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    251     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    252     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    253     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    254     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    255     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    256     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    257     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
    258 };
    259 
    260 static const uint8_t charMap_8859_6[] = {
    261     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    262     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    263     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    264     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    265     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    266     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    267     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    268     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    269     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    270     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    271     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    272     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    273     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    274     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    275     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    276     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    277     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    278     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    279     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    280     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    281     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    282     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    283     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    284     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    285     0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    286     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    287     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    288     0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
    289     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    290     0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
    291     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    292     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    293 };
    294 
    295 static const uint8_t charMap_8859_7[] = {
    296     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    297     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    298     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    299     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    300     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    301     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    302     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    303     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    304     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    305     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    306     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    307     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    308     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    309     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    310     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    311     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    312     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    313     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    314     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    315     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    316     0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
    317     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    318     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
    319     0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
    320     0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    321     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    322     0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    323     0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
    324     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    325     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    326     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    327     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
    328 };
    329 
    330 static const uint8_t charMap_8859_8[] = {
    331     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    332     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    333     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    334     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    335     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    336     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    337     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    338     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    339     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    340     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    341     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    342     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    343     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    344     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    345     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    346     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    347     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    348     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    349     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    350     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    351     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    352     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    353     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    354     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    355     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    356     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    357     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    358     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    359     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    360     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    361     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    362     0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
    363 };
    364 
    365 static const uint8_t charMap_8859_9[] = {
    366     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    367     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    368     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    369     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    370     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    371     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    372     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    373     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    374     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    375     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    376     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    377     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    378     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    379     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    380     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    381     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    382     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    383     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    384     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    385     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    386     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    387     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    388     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    389     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
    390     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    391     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    392     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    393     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
    394     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    395     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    396     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
    397     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    398 };
    399 
    400 static const int32_t ngrams_windows_1251[] = {
    401     0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
    402     0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
    403     0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
    404     0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
    405 };
    406 
    407 static const uint8_t charMap_windows_1251[] = {
    408     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    409     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    410     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    411     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    412     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    413     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    414     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    415     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    416     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    417     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    418     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    419     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    420     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    421     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    422     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    423     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    424     0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
    425     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
    426     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    427     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
    428     0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
    429     0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
    430     0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
    431     0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
    432     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    433     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    434     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    435     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    436     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    437     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    438     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
    439     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
    440 };
    441 
    442 static const int32_t ngrams_windows_1256[] = {
    443     0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
    444     0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
    445     0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
    446     0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
    447 };
    448 
    449 static const uint8_t charMap_windows_1256[] = {
    450     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    451     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    452     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    453     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    454     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    455     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    456     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    457     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    458     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    459     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    460     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    461     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    462     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    463     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    464     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    465     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    466     0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
    467     0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
    468     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    469     0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
    470     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    471     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
    472     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
    473     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    474     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    475     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    476     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
    477     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    478     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
    479     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
    480     0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
    481     0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
    482 };
    483 
    484 static const int32_t ngrams_KOI8_R[] = {
    485     0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
    486     0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
    487     0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
    488     0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
    489 };
    490 
    491 static const uint8_t charMap_KOI8_R[] = {
    492     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    493     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    494     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    495     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    496     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
    497     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    498     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    499     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    500     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    501     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    502     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    503     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    504     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    505     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
    506     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
    507     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
    508     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    509     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    510     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    511     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    512     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
    513     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    514     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
    515     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
    516     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    517     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    518     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    519     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    520     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
    521     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
    522     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
    523     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    524 };
    525 
    526 static const int32_t ngrams_IBM424_he_rtl[] = {
    527     0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
    528     0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
    529     0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
    530     0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
    531 };
    532 
    533 static const int32_t ngrams_IBM424_he_ltr[] = {
    534     0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
    535     0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
    536     0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
    537     0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
    538 };
    539 
    540 static const uint8_t charMap_IBM424_he[] = {
    541 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    542 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    543 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    544 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    545 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    546 /* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    547 /* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    548 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    549 /* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
    550 /* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    551 /* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    552 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    553 /* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    554 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    555 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    556 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    557 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    558 };
    559 
    560 static const int32_t ngrams_IBM420_ar_rtl[] = {
    561     0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
    562     0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
    563     0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
    564     0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
    565 };
    566 
    567 static const int32_t ngrams_IBM420_ar_ltr[] = {
    568     0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
    569     0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
    570     0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
    571     0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
    572 };
    573 
    574 static const uint8_t charMap_IBM420_ar[]= {
    575 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
    576 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    577 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    578 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    579 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    580 /* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    581 /* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    582 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    583 /* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
    584 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
    585 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
    586 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
    587 /* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
    588 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
    589 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
    590 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
    591 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
    592 };
    593 
    594 //ISO-8859-1,2,5,6,7,8,9 Ngrams
    595 
    596 struct NGramsPlusLang {
    597     const int32_t ngrams[64];
    598     const char *  lang;
    599 };
    600 
    601 static const NGramsPlusLang ngrams_8859_1[] =  {
    602   {
    603     {
    604     0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
    605     0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
    606     0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
    607     0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
    608     },
    609     "en"
    610   },
    611   {
    612     {
    613     0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
    614     0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
    615     0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
    616     0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
    617     },
    618     "da"
    619   },
    620   {
    621     {
    622     0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
    623     0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
    624     0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
    625     0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
    626     },
    627     "de"
    628   },
    629   {
    630     {
    631     0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
    632     0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
    633     0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
    634     0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
    635     },
    636     "es"
    637   },
    638   {
    639     {
    640     0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
    641     0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
    642     0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
    643     0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
    644     },
    645     "fr"
    646   },
    647   {
    648     {
    649     0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
    650     0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
    651     0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
    652     0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
    653     },
    654     "it"
    655   },
    656   {
    657     {
    658     0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
    659     0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
    660     0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
    661     0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
    662     },
    663     "nl"
    664   },
    665   {
    666     {
    667     0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
    668     0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
    669     0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
    670     0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
    671     },
    672     "no"
    673   },
    674   {
    675     {
    676     0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
    677     0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
    678     0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
    679     0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
    680     },
    681     "pt"
    682   },
    683   {
    684     {
    685     0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
    686     0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
    687     0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
    688     0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
    689     },
    690     "sv"
    691   }
    692 };
    693 
    694 
    695 static const NGramsPlusLang ngrams_8859_2[] =  {
    696   {
    697     {
    698     0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
    699     0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
    700     0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
    701     0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
    702     },
    703     "cs"
    704   },
    705   {
    706     {
    707     0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
    708     0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
    709     0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
    710     0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
    711     },
    712     "hu"
    713   },
    714   {
    715     {
    716     0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
    717     0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
    718     0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
    719     0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
    720     },
    721     "pl"
    722   },
    723   {
    724     {
    725     0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
    726     0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
    727     0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
    728     0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
    729     },
    730     "ro"
    731   }
    732 };
    733 
    734 static const int32_t ngrams_8859_5_ru[] = {
    735     0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
    736     0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
    737     0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
    738     0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
    739 };
    740 
    741 static const int32_t ngrams_8859_6_ar[] = {
    742     0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
    743     0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
    744     0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
    745     0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
    746 };
    747 
    748 static const int32_t ngrams_8859_7_el[] = {
    749     0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
    750     0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
    751     0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
    752     0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
    753 };
    754 
    755 static const int32_t ngrams_8859_8_I_he[] = {
    756     0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
    757     0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
    758     0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
    759     0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
    760 };
    761 
    762 static const int32_t ngrams_8859_8_he[] = {
    763     0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
    764     0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
    765     0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
    766     0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
    767 };
    768 
    769 static const int32_t ngrams_8859_9_tr[] = {
    770     0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
    771     0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
    772     0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
    773     0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
    774 };
    775 
    776 CharsetRecog_8859_1::~CharsetRecog_8859_1()
    777 {
    778     // nothing to do
    779 }
    780 
    781 UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
    782     const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
    783     uint32_t i;
    784     int32_t bestConfidenceSoFar = -1;
    785     for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
    786         const int32_t *ngrams = ngrams_8859_1[i].ngrams;
    787         const char    *lang   = ngrams_8859_1[i].lang;
    788         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
    789         if (confidence > bestConfidenceSoFar) {
    790             results->set(textIn, this, confidence, name, lang);
    791             bestConfidenceSoFar = confidence;
    792         }
    793     }
    794     return (bestConfidenceSoFar > 0);
    795 }
    796 
    797 const char *CharsetRecog_8859_1::getName() const
    798 {
    799     return "ISO-8859-1";
    800 }
    801 
    802 
    803 CharsetRecog_8859_2::~CharsetRecog_8859_2()
    804 {
    805     // nothing to do
    806 }
    807 
    808 UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
    809     const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
    810     uint32_t i;
    811     int32_t bestConfidenceSoFar = -1;
    812     for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
    813         const int32_t *ngrams = ngrams_8859_2[i].ngrams;
    814         const char    *lang   = ngrams_8859_2[i].lang;
    815         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
    816         if (confidence > bestConfidenceSoFar) {
    817             results->set(textIn, this, confidence, name, lang);
    818             bestConfidenceSoFar = confidence;
    819         }
    820     }
    821     return (bestConfidenceSoFar > 0);
    822 }
    823 
    824 const char *CharsetRecog_8859_2::getName() const
    825 {
    826     return "ISO-8859-2";
    827 }
    828 
    829 
    830 CharsetRecog_8859_5::~CharsetRecog_8859_5()
    831 {
    832     // nothing to do
    833 }
    834 
    835 const char *CharsetRecog_8859_5::getName() const
    836 {
    837     return "ISO-8859-5";
    838 }
    839 
    840 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
    841 {
    842     // nothing to do
    843 }
    844 
    845 const char *CharsetRecog_8859_5_ru::getLanguage() const
    846 {
    847     return "ru";
    848 }
    849 
    850 UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
    851 {
    852     int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
    853     results->set(textIn, this, confidence);
    854     return (confidence > 0);
    855 }
    856 
    857 CharsetRecog_8859_6::~CharsetRecog_8859_6()
    858 {
    859     // nothing to do
    860 }
    861 
    862 const char *CharsetRecog_8859_6::getName() const
    863 {
    864     return "ISO-8859-6";
    865 }
    866 
    867 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
    868 {
    869     // nothing to do
    870 }
    871 
    872 const char *CharsetRecog_8859_6_ar::getLanguage() const
    873 {
    874     return "ar";
    875 }
    876 
    877 UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
    878 {
    879     int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
    880     results->set(textIn, this, confidence);
    881     return (confidence > 0);
    882 }
    883 
    884 CharsetRecog_8859_7::~CharsetRecog_8859_7()
    885 {
    886     // nothing to do
    887 }
    888 
    889 const char *CharsetRecog_8859_7::getName() const
    890 {
    891     return "ISO-8859-7";
    892 }
    893 
    894 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
    895 {
    896     // nothing to do
    897 }
    898 
    899 const char *CharsetRecog_8859_7_el::getLanguage() const
    900 {
    901     return "el";
    902 }
    903 
    904 UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
    905 {
    906     const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
    907     int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
    908     results->set(textIn, this, confidence, name, "el");
    909     return (confidence > 0);
    910 }
    911 
    912 CharsetRecog_8859_8::~CharsetRecog_8859_8()
    913 {
    914     // nothing to do
    915 }
    916 
    917 const char *CharsetRecog_8859_8::getName() const
    918 {
    919     return "ISO-8859-8";
    920 }
    921 
    922 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
    923 {
    924     // nothing to do
    925 }
    926 
    927 const char *CharsetRecog_8859_8_I_he::getName() const
    928 {
    929     return "ISO-8859-8-I";
    930 }
    931 
    932 const char *CharsetRecog_8859_8_I_he::getLanguage() const
    933 {
    934     return "he";
    935 }
    936 
    937 UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
    938 {
    939     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
    940     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
    941     results->set(textIn, this, confidence, name, "he");
    942     return (confidence > 0);
    943 }
    944 
    945 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
    946 {
    947     // od ot gnihton
    948 }
    949 
    950 const char *CharsetRecog_8859_8_he::getLanguage() const
    951 {
    952     return "he";
    953 }
    954 
    955 UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
    956 {
    957     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
    958     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
    959     results->set(textIn, this, confidence, name, "he");
    960     return (confidence > 0);
    961 }
    962 
    963 CharsetRecog_8859_9::~CharsetRecog_8859_9()
    964 {
    965     // nothing to do
    966 }
    967 
    968 const char *CharsetRecog_8859_9::getName() const
    969 {
    970     return "ISO-8859-9";
    971 }
    972 
    973 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
    974 {
    975     // nothing to do
    976 }
    977 
    978 const char *CharsetRecog_8859_9_tr::getLanguage() const
    979 {
    980     return "tr";
    981 }
    982 
    983 UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
    984 {
    985     const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
    986     int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
    987     results->set(textIn, this, confidence, name, "tr");
    988     return (confidence > 0);
    989 }
    990 
    991 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
    992 {
    993     // nothing to do
    994 }
    995 
    996 const char *CharsetRecog_windows_1256::getName() const
    997 {
    998     return  "windows-1256";
    999 }
   1000 
   1001 const char *CharsetRecog_windows_1256::getLanguage() const
   1002 {
   1003     return "ar";
   1004 }
   1005 
   1006 UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
   1007 {
   1008     int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
   1009     results->set(textIn, this, confidence);
   1010     return (confidence > 0);
   1011 }
   1012 
   1013 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
   1014 {
   1015     // nothing to do
   1016 }
   1017 
   1018 const char *CharsetRecog_windows_1251::getName() const
   1019 {
   1020     return  "windows-1251";
   1021 }
   1022 
   1023 const char *CharsetRecog_windows_1251::getLanguage() const
   1024 {
   1025     return "ru";
   1026 }
   1027 
   1028 UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
   1029 {
   1030     int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
   1031     results->set(textIn, this, confidence);
   1032     return (confidence > 0);
   1033 }
   1034 
   1035 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
   1036 {
   1037     // nothing to do
   1038 }
   1039 
   1040 const char *CharsetRecog_KOI8_R::getName() const
   1041 {
   1042     return  "KOI8-R";
   1043 }
   1044 
   1045 const char *CharsetRecog_KOI8_R::getLanguage() const
   1046 {
   1047     return "ru";
   1048 }
   1049 
   1050 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
   1051 {
   1052     int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
   1053     results->set(textIn, this, confidence);
   1054     return (confidence > 0);
   1055 }
   1056 
   1057 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
   1058 {
   1059     // nothing to do
   1060 }
   1061 
   1062 const char *CharsetRecog_IBM424_he::getLanguage() const
   1063 {
   1064     return "he";
   1065 }
   1066 
   1067 CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
   1068 {
   1069     // nothing to do
   1070 }
   1071 
   1072 const char *CharsetRecog_IBM424_he_rtl::getName() const
   1073 {
   1074     return  "IBM424_rtl";
   1075 }
   1076 
   1077 UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
   1078 {
   1079     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
   1080     results->set(textIn, this, confidence);
   1081     return (confidence > 0);
   1082 }
   1083 
   1084 CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
   1085 {
   1086     // nothing to do
   1087 }
   1088 
   1089 const char *CharsetRecog_IBM424_he_ltr::getName() const
   1090 {
   1091     return  "IBM424_ltr";
   1092 }
   1093 
   1094 UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
   1095 {
   1096     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
   1097     results->set(textIn, this, confidence);
   1098     return (confidence > 0);
   1099 }
   1100 
   1101 static const uint8_t unshapeMap_IBM420[] = {
   1102 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
   1103 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
   1104 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
   1105 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
   1106 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
   1107 /* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
   1108 /* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
   1109 /* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
   1110 /* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
   1111 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
   1112 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
   1113 /* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
   1114 /* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
   1115 /* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
   1116 /* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
   1117 /* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
   1118 /* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
   1119 };
   1120 
   1121 CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
   1122 {
   1123     // nothing to do
   1124 }
   1125 
   1126 const char *CharsetRecog_IBM420_ar::getLanguage() const
   1127 {
   1128     return "ar";
   1129 }
   1130 
   1131 void CharsetRecog_IBM420_ar::matchInit(InputText *textIn) {
   1132     prev_fInputBytesLength = textIn->fInputLen;
   1133     prev_fInputBytes = textIn->fInputBytes;
   1134 
   1135     int32_t length = 0;
   1136     uint8_t *bb = unshape(prev_fInputBytes, prev_fInputBytesLength, length);
   1137 
   1138     if (bb != NULL) {
   1139         textIn->fInputBytes = bb;
   1140         textIn->fInputLen = length;
   1141 
   1142         deleteBuffer = TRUE;
   1143     } else {
   1144         deleteBuffer = FALSE;
   1145     }
   1146 }
   1147 
   1148 uint8_t *CharsetRecog_IBM420_ar::unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length) {
   1149     uint8_t *resultArray = unshapeLamAlef(inputBytes, inputBytesLength, length);
   1150 
   1151     if (resultArray != NULL) {
   1152         for (int32_t i = 0; i < inputBytesLength; i++) {
   1153             resultArray[i] = unshapeMap_IBM420[resultArray[i]];
   1154         }
   1155     }
   1156 
   1157     return resultArray;
   1158 }
   1159 
   1160 uint8_t *CharsetRecog_IBM420_ar::unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length) {
   1161     int32_t bigBufferLength = inputBytesLength * 2;
   1162     uint8_t *bigBuffer = (uint8_t *)uprv_malloc(bigBufferLength);
   1163     uint8_t *resultBuffer = NULL;
   1164 
   1165     if (bigBuffer != NULL) {
   1166         int32_t bufferIndex;
   1167         static const uint8_t unshapedLamAlef[] = { 0xb1, 0x56 };
   1168 
   1169         for (int32_t i = bufferIndex = 0; i < inputBytesLength; i++) {
   1170             if (isLamAlef(inputBytes[i])) {
   1171                 bigBuffer[bufferIndex++] = unshapedLamAlef[0];
   1172                 bigBuffer[bufferIndex++] = unshapedLamAlef[1];
   1173             } else {
   1174                 bigBuffer[bufferIndex++] = inputBytes[i];
   1175             }
   1176         }
   1177 
   1178         length = bufferIndex;
   1179         resultBuffer = (uint8_t *)uprv_malloc(length);
   1180         if (resultBuffer != NULL) {
   1181             uprv_memcpy(resultBuffer, bigBuffer, length);
   1182         }
   1183     }
   1184 
   1185     if (bigBuffer != NULL) {
   1186         uprv_free(bigBuffer);
   1187     }
   1188 
   1189     return resultBuffer;
   1190 }
   1191 
   1192 void CharsetRecog_IBM420_ar::matchFinish(InputText *textIn) {
   1193     if (deleteBuffer) {
   1194         uprv_free(textIn->fInputBytes);
   1195 
   1196         textIn->fInputBytes = prev_fInputBytes;
   1197         textIn->fInputLen = prev_fInputBytesLength;
   1198     }
   1199 }
   1200 
   1201 UBool CharsetRecog_IBM420_ar::isLamAlef(uint8_t b) {
   1202     static const uint8_t shapedLamAlef[] = {
   1203         0xb2, 0xb3, 0xb4, 0xb5, 0xb7, 0xb8
   1204     };
   1205 
   1206     for (uint32_t i = 0; i < sizeof(shapedLamAlef); i++) {
   1207         if (b == shapedLamAlef[i]) {
   1208             return TRUE;
   1209         }
   1210     }
   1211 
   1212     return FALSE;
   1213 }
   1214 
   1215 CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
   1216 {
   1217     // nothing to do
   1218 }
   1219 
   1220 const char *CharsetRecog_IBM420_ar_rtl::getName() const
   1221 {
   1222     return  "IBM420_rtl";
   1223 }
   1224 
   1225 UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
   1226 {
   1227     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
   1228     results->set(textIn, this, confidence);
   1229     return (confidence > 0);
   1230 }
   1231 
   1232 CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
   1233 {
   1234     // nothing to do
   1235 }
   1236 
   1237 const char *CharsetRecog_IBM420_ar_ltr::getName() const
   1238 {
   1239     return  "IBM420_ltr";
   1240 }
   1241 
   1242 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
   1243 {
   1244     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
   1245     results->set(textIn, this, confidence);
   1246     return (confidence > 0);
   1247 }
   1248 
   1249 U_NAMESPACE_END
   1250 #endif
   1251 
   1252