Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2009, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnvisci.c
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2001JUN26
     12 *   created by: Ram Viswanadha
     13 *
     14 *   Date        Name        Description
     15 *   24/7/2001   Ram         Added support for EXT character handling
     16 */
     17 
     18 #include "unicode/utypes.h"
     19 
     20 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     21 
     22 #include "cmemory.h"
     23 #include "ucnv_bld.h"
     24 #include "unicode/ucnv.h"
     25 #include "ucnv_cnv.h"
     26 #include "unicode/ucnv_cb.h"
     27 #include "unicode/uset.h"
     28 #include "cstring.h"
     29 
     30 #define UCNV_OPTIONS_VERSION_MASK 0xf
     31 #define NUKTA               0x093c
     32 #define HALANT              0x094d
     33 #define ZWNJ                0x200c /* Zero Width Non Joiner */
     34 #define ZWJ                 0x200d /* Zero width Joiner */
     35 #define INVALID_CHAR        0xffff
     36 #define ATR                 0xEF   /* Attribute code */
     37 #define EXT                 0xF0   /* Extension code */
     38 #define DANDA               0x0964
     39 #define DOUBLE_DANDA        0x0965
     40 #define ISCII_NUKTA         0xE9
     41 #define ISCII_HALANT        0xE8
     42 #define ISCII_DANDA         0xEA
     43 #define ISCII_INV           0xD9
     44 #define ISCII_VOWEL_SIGN_E  0xE0
     45 #define INDIC_BLOCK_BEGIN   0x0900
     46 #define INDIC_BLOCK_END     0x0D7F
     47 #define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
     48 #define VOCALLIC_RR         0x0931
     49 #define LF                  0x0A
     50 #define ASCII_END           0xA0
     51 #define NO_CHAR_MARKER      0xFFFE
     52 #define TELUGU_DELTA        DELTA * TELUGU
     53 #define DEV_ABBR_SIGN       0x0970
     54 #define DEV_ANUDATTA        0x0952
     55 #define EXT_RANGE_BEGIN     0xA1
     56 #define EXT_RANGE_END       0xEE
     57 
     58 #define PNJ_DELTA           0x0100
     59 #define PNJ_BINDI           0x0A02
     60 #define PNJ_TIPPI           0x0A70
     61 #define PNJ_SIGN_VIRAMA     0x0A4D
     62 #define PNJ_ADHAK           0x0A71
     63 #define PNJ_HA              0x0A39
     64 #define PNJ_RRA             0x0A5C
     65 
     66 static USet* PNJ_BINDI_TIPPI_SET= NULL;
     67 static USet* PNJ_CONSONANT_SET= NULL;
     68 
     69 typedef enum {
     70     DEVANAGARI =0,
     71     BENGALI,
     72     GURMUKHI,
     73     GUJARATI,
     74     ORIYA,
     75     TAMIL,
     76     TELUGU,
     77     KANNADA,
     78     MALAYALAM,
     79     DELTA=0x80
     80 }UniLang;
     81 
     82 /**
     83  * Enumeration for switching code pages if <ATR>+<one of below values>
     84  * is encountered
     85  */
     86 typedef enum {
     87     DEF = 0x40,
     88     RMN = 0x41,
     89     DEV = 0x42,
     90     BNG = 0x43,
     91     TML = 0x44,
     92     TLG = 0x45,
     93     ASM = 0x46,
     94     ORI = 0x47,
     95     KND = 0x48,
     96     MLM = 0x49,
     97     GJR = 0x4A,
     98     PNJ = 0x4B,
     99     ARB = 0x71,
    100     PES = 0x72,
    101     URD = 0x73,
    102     SND = 0x74,
    103     KSM = 0x75,
    104     PST = 0x76
    105 }ISCIILang;
    106 
    107 typedef enum {
    108     DEV_MASK =0x80,
    109     PNJ_MASK =0x40,
    110     GJR_MASK =0x20,
    111     ORI_MASK =0x10,
    112     BNG_MASK =0x08,
    113     KND_MASK =0x04,
    114     MLM_MASK =0x02,
    115     TML_MASK =0x01,
    116     ZERO =0x00
    117 }MaskEnum;
    118 
    119 #define ISCII_CNV_PREFIX "ISCII,version="
    120 
    121 typedef struct {
    122     UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
    123     UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
    124     uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
    125     uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
    126     uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
    127     MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
    128     MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
    129     MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
    130     UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
    131     UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
    132     char name[sizeof(ISCII_CNV_PREFIX) + 1];
    133     UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
    134 } UConverterDataISCII;
    135 
    136 typedef struct LookupDataStruct {
    137     UniLang uniLang;
    138     MaskEnum maskEnum;
    139     ISCIILang isciiLang;
    140 } LookupDataStruct;
    141 
    142 static const LookupDataStruct lookupInitialData[]={
    143     { DEVANAGARI, DEV_MASK,  DEV },
    144     { BENGALI,    BNG_MASK,  BNG },
    145     { GURMUKHI,   PNJ_MASK,  PNJ },
    146     { GUJARATI,   GJR_MASK,  GJR },
    147     { ORIYA,      ORI_MASK,  ORI },
    148     { TAMIL,      TML_MASK,  TML },
    149     { TELUGU,     KND_MASK,  TLG },
    150     { KANNADA,    KND_MASK,  KND },
    151     { MALAYALAM,  MLM_MASK,  MLM }
    152 };
    153 
    154 static void initializeSets() {
    155     /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openEmpty(); */
    156     PNJ_CONSONANT_SET = uset_open(0,0);
    157     uset_clear(PNJ_CONSONANT_SET);
    158 
    159     uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);
    160     uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);
    161     uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);
    162     uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);
    163 
    164     PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);
    165     uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);
    166     uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);
    167     uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);
    168     uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);
    169 
    170     uset_compact(PNJ_CONSONANT_SET);
    171     uset_compact(PNJ_BINDI_TIPPI_SET);
    172 }
    173 
    174 static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
    175     if(pArgs->onlyTestIsLoadable) {
    176         return;
    177     }
    178 
    179     /* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */
    180     initializeSets();
    181 
    182     cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
    183 
    184     if (cnv->extraInfo != NULL) {
    185         int32_t len=0;
    186         UConverterDataISCII *converterData=
    187                 (UConverterDataISCII *) cnv->extraInfo;
    188         converterData->contextCharToUnicode=NO_CHAR_MARKER;
    189         cnv->toUnicodeStatus = missingCharMarker;
    190         converterData->contextCharFromUnicode=0x0000;
    191         converterData->resetToDefaultToUnicode=FALSE;
    192         /* check if the version requested is supported */
    193         if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
    194             /* initialize state variables */
    195             converterData->currentDeltaFromUnicode
    196                     = converterData->currentDeltaToUnicode
    197                             = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
    198 
    199             converterData->currentMaskFromUnicode
    200                     = converterData->currentMaskToUnicode
    201                             = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
    202 
    203             converterData->isFirstBuffer=TRUE;
    204             (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
    205             len = (int32_t)uprv_strlen(converterData->name);
    206             converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
    207             converterData->name[len+1]=0;
    208 
    209             converterData->prevToUnicodeStatus = 0x0000;
    210         } else {
    211             uprv_free(cnv->extraInfo);
    212             cnv->extraInfo = NULL;
    213             *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    214         }
    215 
    216     } else {
    217         *errorCode =U_MEMORY_ALLOCATION_ERROR;
    218     }
    219 }
    220 
    221 static void _ISCIIClose(UConverter *cnv) {
    222     if (cnv->extraInfo!=NULL) {
    223         if (!cnv->isExtraLocal) {
    224             uprv_free(cnv->extraInfo);
    225         }
    226         cnv->extraInfo=NULL;
    227     }
    228     if (PNJ_CONSONANT_SET != NULL) {
    229         uset_close(PNJ_CONSONANT_SET);
    230         PNJ_CONSONANT_SET = NULL;
    231     }
    232     if (PNJ_BINDI_TIPPI_SET != NULL) {
    233         uset_close(PNJ_BINDI_TIPPI_SET);
    234         PNJ_BINDI_TIPPI_SET = NULL;
    235     }
    236 }
    237 
    238 static const char* _ISCIIgetName(const UConverter* cnv) {
    239     if (cnv->extraInfo) {
    240         UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
    241         return myData->name;
    242     }
    243     return NULL;
    244 }
    245 
    246 static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
    247     UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
    248     if (choice<=UCNV_RESET_TO_UNICODE) {
    249         cnv->toUnicodeStatus = missingCharMarker;
    250         cnv->mode=0;
    251         data->currentDeltaToUnicode=data->defDeltaToUnicode;
    252         data->currentMaskToUnicode = data->defMaskToUnicode;
    253         data->contextCharToUnicode=NO_CHAR_MARKER;
    254         data->prevToUnicodeStatus = 0x0000;
    255     }
    256     if (choice!=UCNV_RESET_TO_UNICODE) {
    257         cnv->fromUChar32=0x0000;
    258         data->contextCharFromUnicode=0x00;
    259         data->currentMaskFromUnicode=data->defMaskToUnicode;
    260         data->currentDeltaFromUnicode=data->defDeltaToUnicode;
    261         data->isFirstBuffer=TRUE;
    262         data->resetToDefaultToUnicode=FALSE;
    263     }
    264 }
    265 
    266 /**
    267  * The values in validity table are indexed by the lower bits of Unicode
    268  * range 0x0900 - 0x09ff. The values have a structure like:
    269  *       ---------------------------------------------------------------
    270  *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
    271  *      |       |       |       |       | ASM   | KND   |       |       |
    272  *       ---------------------------------------------------------------
    273  * If a code point is valid in a particular script
    274  * then that bit is turned on
    275  *
    276  * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
    277  * to represent these languages
    278  *
    279  * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
    280  * and combine and use 1 bit to represent these languages.
    281  *
    282  * TODO: It is probably easier to understand and maintain to change this
    283  * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
    284  */
    285 
    286 static const uint8_t validityTable[128] = {
    287 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
    288 /* Note: This table was edited to mirror the Windows XP implementation */
    289 /*ISCII:Valid:Unicode */
    290 /*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    291 /*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    292 /*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    293 /*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    294 /*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    295 /*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    296 /*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    297 /*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    298 /*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    299 /*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    300 /*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    301 /*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    302 /*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    303 /*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    304 /*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    305 /*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    306 /*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    307 /*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    308 /*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    309 /*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    310 /*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    311 /*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    312 /*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    313 /*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    314 /*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    315 /*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    316 /*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    317 /*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    318 /*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    319 /*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    320 /*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    321 /*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    322 /*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    323 /*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    324 /*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    325 /*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    326 /*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    327 /*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    328 /*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    329 /*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    330 /*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    331 /*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
    332 /*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    333 /*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    334 /*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    335 /*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    336 /*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    337 /*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    338 /*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    339 /*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
    340 /*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    341 /*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    342 /*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
    343 /*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    344 /*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    345 /*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    346 /*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    347 /*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    348 /*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    349 /*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    350 /*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    351 /*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    352 /*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    353 /*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    354 /*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    355 /*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    356 /*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    357 /*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    358 /*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
    359 /*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    360 /*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    361 /*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    362 /*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    363 /*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    364 /*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    365 /*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    366 /*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    367 /*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    368 /*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    369 /*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    370 /*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    371 /*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    372 /*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    373 /*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    374 /*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    375 /*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
    376 /*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
    377 /*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
    378 /*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    379 /*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    380 /*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    381 /*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    382 /*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    383 /*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    384 /*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    385 /*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    386 /*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    387 /*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    388 /*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    389 /*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    390 /*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    391 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    392 /*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    393 /*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    394 /*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    395 /*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    396 /*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    397 /*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    398 /*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    399 /*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    400 /*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    401 /*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    402 /*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    403 /*
    404  * The length of the array is 128 to provide values for 0x900..0x97f.
    405  * The last 15 entries for 0x971..0x97f of the validity table are all zero
    406  * because no Indic script uses such Unicode code points.
    407  */
    408 /*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
    409 };
    410 
    411 static const uint16_t fromUnicodeTable[128]={
    412     0x00a0 ,/* 0x0900 */
    413     0x00a1 ,/* 0x0901 */
    414     0x00a2 ,/* 0x0902 */
    415     0x00a3 ,/* 0x0903 */
    416     0xa4e0 ,/* 0x0904 */
    417     0x00a4 ,/* 0x0905 */
    418     0x00a5 ,/* 0x0906 */
    419     0x00a6 ,/* 0x0907 */
    420     0x00a7 ,/* 0x0908 */
    421     0x00a8 ,/* 0x0909 */
    422     0x00a9 ,/* 0x090a */
    423     0x00aa ,/* 0x090b */
    424     0xA6E9 ,/* 0x090c */
    425     0x00ae ,/* 0x090d */
    426     0x00ab ,/* 0x090e */
    427     0x00ac ,/* 0x090f */
    428     0x00ad ,/* 0x0910 */
    429     0x00b2 ,/* 0x0911 */
    430     0x00af ,/* 0x0912 */
    431     0x00b0 ,/* 0x0913 */
    432     0x00b1 ,/* 0x0914 */
    433     0x00b3 ,/* 0x0915 */
    434     0x00b4 ,/* 0x0916 */
    435     0x00b5 ,/* 0x0917 */
    436     0x00b6 ,/* 0x0918 */
    437     0x00b7 ,/* 0x0919 */
    438     0x00b8 ,/* 0x091a */
    439     0x00b9 ,/* 0x091b */
    440     0x00ba ,/* 0x091c */
    441     0x00bb ,/* 0x091d */
    442     0x00bc ,/* 0x091e */
    443     0x00bd ,/* 0x091f */
    444     0x00be ,/* 0x0920 */
    445     0x00bf ,/* 0x0921 */
    446     0x00c0 ,/* 0x0922 */
    447     0x00c1 ,/* 0x0923 */
    448     0x00c2 ,/* 0x0924 */
    449     0x00c3 ,/* 0x0925 */
    450     0x00c4 ,/* 0x0926 */
    451     0x00c5 ,/* 0x0927 */
    452     0x00c6 ,/* 0x0928 */
    453     0x00c7 ,/* 0x0929 */
    454     0x00c8 ,/* 0x092a */
    455     0x00c9 ,/* 0x092b */
    456     0x00ca ,/* 0x092c */
    457     0x00cb ,/* 0x092d */
    458     0x00cc ,/* 0x092e */
    459     0x00cd ,/* 0x092f */
    460     0x00cf ,/* 0x0930 */
    461     0x00d0 ,/* 0x0931 */
    462     0x00d1 ,/* 0x0932 */
    463     0x00d2 ,/* 0x0933 */
    464     0x00d3 ,/* 0x0934 */
    465     0x00d4 ,/* 0x0935 */
    466     0x00d5 ,/* 0x0936 */
    467     0x00d6 ,/* 0x0937 */
    468     0x00d7 ,/* 0x0938 */
    469     0x00d8 ,/* 0x0939 */
    470     0xFFFF ,/* 0x093A */
    471     0xFFFF ,/* 0x093B */
    472     0x00e9 ,/* 0x093c */
    473     0xEAE9 ,/* 0x093d */
    474     0x00da ,/* 0x093e */
    475     0x00db ,/* 0x093f */
    476     0x00dc ,/* 0x0940 */
    477     0x00dd ,/* 0x0941 */
    478     0x00de ,/* 0x0942 */
    479     0x00df ,/* 0x0943 */
    480     0xDFE9 ,/* 0x0944 */
    481     0x00e3 ,/* 0x0945 */
    482     0x00e0 ,/* 0x0946 */
    483     0x00e1 ,/* 0x0947 */
    484     0x00e2 ,/* 0x0948 */
    485     0x00e7 ,/* 0x0949 */
    486     0x00e4 ,/* 0x094a */
    487     0x00e5 ,/* 0x094b */
    488     0x00e6 ,/* 0x094c */
    489     0x00e8 ,/* 0x094d */
    490     0x00ec ,/* 0x094e */
    491     0x00ed ,/* 0x094f */
    492     0xA1E9 ,/* 0x0950 */ /* OM Symbol */
    493     0xFFFF ,/* 0x0951 */
    494     0xF0B8 ,/* 0x0952 */
    495     0xFFFF ,/* 0x0953 */
    496     0xFFFF ,/* 0x0954 */
    497     0xFFFF ,/* 0x0955 */
    498     0xFFFF ,/* 0x0956 */
    499     0xFFFF ,/* 0x0957 */
    500     0xb3e9 ,/* 0x0958 */
    501     0xb4e9 ,/* 0x0959 */
    502     0xb5e9 ,/* 0x095a */
    503     0xbae9 ,/* 0x095b */
    504     0xbfe9 ,/* 0x095c */
    505     0xC0E9 ,/* 0x095d */
    506     0xc9e9 ,/* 0x095e */
    507     0x00ce ,/* 0x095f */
    508     0xAAe9 ,/* 0x0960 */
    509     0xA7E9 ,/* 0x0961 */
    510     0xDBE9 ,/* 0x0962 */
    511     0xDCE9 ,/* 0x0963 */
    512     0x00ea ,/* 0x0964 */
    513     0xeaea ,/* 0x0965 */
    514     0x00f1 ,/* 0x0966 */
    515     0x00f2 ,/* 0x0967 */
    516     0x00f3 ,/* 0x0968 */
    517     0x00f4 ,/* 0x0969 */
    518     0x00f5 ,/* 0x096a */
    519     0x00f6 ,/* 0x096b */
    520     0x00f7 ,/* 0x096c */
    521     0x00f8 ,/* 0x096d */
    522     0x00f9 ,/* 0x096e */
    523     0x00fa ,/* 0x096f */
    524     0xF0BF ,/* 0x0970 */
    525     0xFFFF ,/* 0x0971 */
    526     0xFFFF ,/* 0x0972 */
    527     0xFFFF ,/* 0x0973 */
    528     0xFFFF ,/* 0x0974 */
    529     0xFFFF ,/* 0x0975 */
    530     0xFFFF ,/* 0x0976 */
    531     0xFFFF ,/* 0x0977 */
    532     0xFFFF ,/* 0x0978 */
    533     0xFFFF ,/* 0x0979 */
    534     0xFFFF ,/* 0x097a */
    535     0xFFFF ,/* 0x097b */
    536     0xFFFF ,/* 0x097c */
    537     0xFFFF ,/* 0x097d */
    538     0xFFFF ,/* 0x097e */
    539     0xFFFF ,/* 0x097f */
    540 };
    541 static const uint16_t toUnicodeTable[256]={
    542     0x0000,/* 0x00 */
    543     0x0001,/* 0x01 */
    544     0x0002,/* 0x02 */
    545     0x0003,/* 0x03 */
    546     0x0004,/* 0x04 */
    547     0x0005,/* 0x05 */
    548     0x0006,/* 0x06 */
    549     0x0007,/* 0x07 */
    550     0x0008,/* 0x08 */
    551     0x0009,/* 0x09 */
    552     0x000a,/* 0x0a */
    553     0x000b,/* 0x0b */
    554     0x000c,/* 0x0c */
    555     0x000d,/* 0x0d */
    556     0x000e,/* 0x0e */
    557     0x000f,/* 0x0f */
    558     0x0010,/* 0x10 */
    559     0x0011,/* 0x11 */
    560     0x0012,/* 0x12 */
    561     0x0013,/* 0x13 */
    562     0x0014,/* 0x14 */
    563     0x0015,/* 0x15 */
    564     0x0016,/* 0x16 */
    565     0x0017,/* 0x17 */
    566     0x0018,/* 0x18 */
    567     0x0019,/* 0x19 */
    568     0x001a,/* 0x1a */
    569     0x001b,/* 0x1b */
    570     0x001c,/* 0x1c */
    571     0x001d,/* 0x1d */
    572     0x001e,/* 0x1e */
    573     0x001f,/* 0x1f */
    574     0x0020,/* 0x20 */
    575     0x0021,/* 0x21 */
    576     0x0022,/* 0x22 */
    577     0x0023,/* 0x23 */
    578     0x0024,/* 0x24 */
    579     0x0025,/* 0x25 */
    580     0x0026,/* 0x26 */
    581     0x0027,/* 0x27 */
    582     0x0028,/* 0x28 */
    583     0x0029,/* 0x29 */
    584     0x002a,/* 0x2a */
    585     0x002b,/* 0x2b */
    586     0x002c,/* 0x2c */
    587     0x002d,/* 0x2d */
    588     0x002e,/* 0x2e */
    589     0x002f,/* 0x2f */
    590     0x0030,/* 0x30 */
    591     0x0031,/* 0x31 */
    592     0x0032,/* 0x32 */
    593     0x0033,/* 0x33 */
    594     0x0034,/* 0x34 */
    595     0x0035,/* 0x35 */
    596     0x0036,/* 0x36 */
    597     0x0037,/* 0x37 */
    598     0x0038,/* 0x38 */
    599     0x0039,/* 0x39 */
    600     0x003A,/* 0x3A */
    601     0x003B,/* 0x3B */
    602     0x003c,/* 0x3c */
    603     0x003d,/* 0x3d */
    604     0x003e,/* 0x3e */
    605     0x003f,/* 0x3f */
    606     0x0040,/* 0x40 */
    607     0x0041,/* 0x41 */
    608     0x0042,/* 0x42 */
    609     0x0043,/* 0x43 */
    610     0x0044,/* 0x44 */
    611     0x0045,/* 0x45 */
    612     0x0046,/* 0x46 */
    613     0x0047,/* 0x47 */
    614     0x0048,/* 0x48 */
    615     0x0049,/* 0x49 */
    616     0x004a,/* 0x4a */
    617     0x004b,/* 0x4b */
    618     0x004c,/* 0x4c */
    619     0x004d,/* 0x4d */
    620     0x004e,/* 0x4e */
    621     0x004f,/* 0x4f */
    622     0x0050,/* 0x50 */
    623     0x0051,/* 0x51 */
    624     0x0052,/* 0x52 */
    625     0x0053,/* 0x53 */
    626     0x0054,/* 0x54 */
    627     0x0055,/* 0x55 */
    628     0x0056,/* 0x56 */
    629     0x0057,/* 0x57 */
    630     0x0058,/* 0x58 */
    631     0x0059,/* 0x59 */
    632     0x005a,/* 0x5a */
    633     0x005b,/* 0x5b */
    634     0x005c,/* 0x5c */
    635     0x005d,/* 0x5d */
    636     0x005e,/* 0x5e */
    637     0x005f,/* 0x5f */
    638     0x0060,/* 0x60 */
    639     0x0061,/* 0x61 */
    640     0x0062,/* 0x62 */
    641     0x0063,/* 0x63 */
    642     0x0064,/* 0x64 */
    643     0x0065,/* 0x65 */
    644     0x0066,/* 0x66 */
    645     0x0067,/* 0x67 */
    646     0x0068,/* 0x68 */
    647     0x0069,/* 0x69 */
    648     0x006a,/* 0x6a */
    649     0x006b,/* 0x6b */
    650     0x006c,/* 0x6c */
    651     0x006d,/* 0x6d */
    652     0x006e,/* 0x6e */
    653     0x006f,/* 0x6f */
    654     0x0070,/* 0x70 */
    655     0x0071,/* 0x71 */
    656     0x0072,/* 0x72 */
    657     0x0073,/* 0x73 */
    658     0x0074,/* 0x74 */
    659     0x0075,/* 0x75 */
    660     0x0076,/* 0x76 */
    661     0x0077,/* 0x77 */
    662     0x0078,/* 0x78 */
    663     0x0079,/* 0x79 */
    664     0x007a,/* 0x7a */
    665     0x007b,/* 0x7b */
    666     0x007c,/* 0x7c */
    667     0x007d,/* 0x7d */
    668     0x007e,/* 0x7e */
    669     0x007f,/* 0x7f */
    670     0x0080,/* 0x80 */
    671     0x0081,/* 0x81 */
    672     0x0082,/* 0x82 */
    673     0x0083,/* 0x83 */
    674     0x0084,/* 0x84 */
    675     0x0085,/* 0x85 */
    676     0x0086,/* 0x86 */
    677     0x0087,/* 0x87 */
    678     0x0088,/* 0x88 */
    679     0x0089,/* 0x89 */
    680     0x008a,/* 0x8a */
    681     0x008b,/* 0x8b */
    682     0x008c,/* 0x8c */
    683     0x008d,/* 0x8d */
    684     0x008e,/* 0x8e */
    685     0x008f,/* 0x8f */
    686     0x0090,/* 0x90 */
    687     0x0091,/* 0x91 */
    688     0x0092,/* 0x92 */
    689     0x0093,/* 0x93 */
    690     0x0094,/* 0x94 */
    691     0x0095,/* 0x95 */
    692     0x0096,/* 0x96 */
    693     0x0097,/* 0x97 */
    694     0x0098,/* 0x98 */
    695     0x0099,/* 0x99 */
    696     0x009a,/* 0x9a */
    697     0x009b,/* 0x9b */
    698     0x009c,/* 0x9c */
    699     0x009d,/* 0x9d */
    700     0x009e,/* 0x9e */
    701     0x009f,/* 0x9f */
    702     0x00A0,/* 0xa0 */
    703     0x0901,/* 0xa1 */
    704     0x0902,/* 0xa2 */
    705     0x0903,/* 0xa3 */
    706     0x0905,/* 0xa4 */
    707     0x0906,/* 0xa5 */
    708     0x0907,/* 0xa6 */
    709     0x0908,/* 0xa7 */
    710     0x0909,/* 0xa8 */
    711     0x090a,/* 0xa9 */
    712     0x090b,/* 0xaa */
    713     0x090e,/* 0xab */
    714     0x090f,/* 0xac */
    715     0x0910,/* 0xad */
    716     0x090d,/* 0xae */
    717     0x0912,/* 0xaf */
    718     0x0913,/* 0xb0 */
    719     0x0914,/* 0xb1 */
    720     0x0911,/* 0xb2 */
    721     0x0915,/* 0xb3 */
    722     0x0916,/* 0xb4 */
    723     0x0917,/* 0xb5 */
    724     0x0918,/* 0xb6 */
    725     0x0919,/* 0xb7 */
    726     0x091a,/* 0xb8 */
    727     0x091b,/* 0xb9 */
    728     0x091c,/* 0xba */
    729     0x091d,/* 0xbb */
    730     0x091e,/* 0xbc */
    731     0x091f,/* 0xbd */
    732     0x0920,/* 0xbe */
    733     0x0921,/* 0xbf */
    734     0x0922,/* 0xc0 */
    735     0x0923,/* 0xc1 */
    736     0x0924,/* 0xc2 */
    737     0x0925,/* 0xc3 */
    738     0x0926,/* 0xc4 */
    739     0x0927,/* 0xc5 */
    740     0x0928,/* 0xc6 */
    741     0x0929,/* 0xc7 */
    742     0x092a,/* 0xc8 */
    743     0x092b,/* 0xc9 */
    744     0x092c,/* 0xca */
    745     0x092d,/* 0xcb */
    746     0x092e,/* 0xcc */
    747     0x092f,/* 0xcd */
    748     0x095f,/* 0xce */
    749     0x0930,/* 0xcf */
    750     0x0931,/* 0xd0 */
    751     0x0932,/* 0xd1 */
    752     0x0933,/* 0xd2 */
    753     0x0934,/* 0xd3 */
    754     0x0935,/* 0xd4 */
    755     0x0936,/* 0xd5 */
    756     0x0937,/* 0xd6 */
    757     0x0938,/* 0xd7 */
    758     0x0939,/* 0xd8 */
    759     0x200D,/* 0xd9 */
    760     0x093e,/* 0xda */
    761     0x093f,/* 0xdb */
    762     0x0940,/* 0xdc */
    763     0x0941,/* 0xdd */
    764     0x0942,/* 0xde */
    765     0x0943,/* 0xdf */
    766     0x0946,/* 0xe0 */
    767     0x0947,/* 0xe1 */
    768     0x0948,/* 0xe2 */
    769     0x0945,/* 0xe3 */
    770     0x094a,/* 0xe4 */
    771     0x094b,/* 0xe5 */
    772     0x094c,/* 0xe6 */
    773     0x0949,/* 0xe7 */
    774     0x094d,/* 0xe8 */
    775     0x093c,/* 0xe9 */
    776     0x0964,/* 0xea */
    777     0xFFFF,/* 0xeb */
    778     0xFFFF,/* 0xec */
    779     0xFFFF,/* 0xed */
    780     0xFFFF,/* 0xee */
    781     0xFFFF,/* 0xef */
    782     0xFFFF,/* 0xf0 */
    783     0x0966,/* 0xf1 */
    784     0x0967,/* 0xf2 */
    785     0x0968,/* 0xf3 */
    786     0x0969,/* 0xf4 */
    787     0x096a,/* 0xf5 */
    788     0x096b,/* 0xf6 */
    789     0x096c,/* 0xf7 */
    790     0x096d,/* 0xf8 */
    791     0x096e,/* 0xf9 */
    792     0x096f,/* 0xfa */
    793     0xFFFF,/* 0xfb */
    794     0xFFFF,/* 0xfc */
    795     0xFFFF,/* 0xfd */
    796     0xFFFF,/* 0xfe */
    797     0xFFFF /* 0xff */
    798 };
    799 
    800 static const uint16_t vowelSignESpecialCases[][2]={
    801 	{ 2 /*length of array*/    , 0      },
    802 	{ 0xA4 , 0x0904 },
    803 };
    804 
    805 static const uint16_t nuktaSpecialCases[][2]={
    806     { 16 /*length of array*/   , 0      },
    807     { 0xA6 , 0x090c },
    808     { 0xEA , 0x093D },
    809     { 0xDF , 0x0944 },
    810     { 0xA1 , 0x0950 },
    811     { 0xb3 , 0x0958 },
    812     { 0xb4 , 0x0959 },
    813     { 0xb5 , 0x095a },
    814     { 0xba , 0x095b },
    815     { 0xbf , 0x095c },
    816     { 0xC0 , 0x095d },
    817     { 0xc9 , 0x095e },
    818     { 0xAA , 0x0960 },
    819     { 0xA7 , 0x0961 },
    820     { 0xDB , 0x0962 },
    821     { 0xDC , 0x0963 },
    822 };
    823 
    824 
    825 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
    826     int32_t offset = (int32_t)(source - args->source-1);                                        \
    827       /* write the targetUniChar  to target */                                                  \
    828     if(target < targetLimit){                                                                   \
    829         if(targetByteUnit <= 0xFF){                                                             \
    830             *(target)++ = (uint8_t)(targetByteUnit);                                            \
    831             if(offsets){                                                                        \
    832                 *(offsets++) = offset;                                                          \
    833             }                                                                                   \
    834         }else{                                                                                  \
    835             if (targetByteUnit > 0xFFFF) {                                                      \
    836                 *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
    837                 if (offsets) {                                                                  \
    838                     --offset;                                                                   \
    839                     *(offsets++) = offset;                                                      \
    840                 }                                                                               \
    841             }                                                                                   \
    842             if (!(target < targetLimit)) {                                                      \
    843                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
    844                                 (uint8_t)(targetByteUnit >> 8);                                 \
    845                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
    846                                 (uint8_t)targetByteUnit;                                        \
    847                 *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
    848             } else {                                                                            \
    849                 *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
    850                 if(offsets){                                                                    \
    851                     *(offsets++) = offset;                                                      \
    852                 }                                                                               \
    853                 if(target < targetLimit){                                                       \
    854                     *(target)++ = (uint8_t)  targetByteUnit;                                    \
    855                     if(offsets){                                                                \
    856                         *(offsets++) = offset                            ;                      \
    857                     }                                                                           \
    858                 }else{                                                                          \
    859                     args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
    860                                 (uint8_t) (targetByteUnit);                                     \
    861                     *err = U_BUFFER_OVERFLOW_ERROR;                                             \
    862                 }                                                                               \
    863             }                                                                                   \
    864         }                                                                                       \
    865     }else{                                                                                      \
    866         if (targetByteUnit & 0xFF0000) {                                                        \
    867             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
    868                         (uint8_t) (targetByteUnit >>16);                                        \
    869         }                                                                                       \
    870         if(targetByteUnit & 0xFF00){                                                            \
    871             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
    872                         (uint8_t) (targetByteUnit >>8);                                         \
    873         }                                                                                       \
    874         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
    875                         (uint8_t) (targetByteUnit);                                             \
    876         *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
    877     }                                                                                           \
    878 }
    879 
    880 /* Rules:
    881  *    Explicit Halant :
    882  *                      <HALANT> + <ZWNJ>
    883  *    Soft Halant :
    884  *                      <HALANT> + <ZWJ>
    885  */
    886 
    887 static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
    888         UConverterFromUnicodeArgs * args, UErrorCode * err) {
    889     const UChar *source = args->source;
    890     const UChar *sourceLimit = args->sourceLimit;
    891     unsigned char *target = (unsigned char *) args->target;
    892     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
    893     int32_t* offsets = args->offsets;
    894     uint32_t targetByteUnit = 0x0000;
    895     UChar32 sourceChar = 0x0000;
    896     UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
    897     UConverterDataISCII *converterData;
    898     uint16_t newDelta=0;
    899     uint16_t range = 0;
    900     UBool deltaChanged = FALSE;
    901 
    902     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
    903         *err = U_ILLEGAL_ARGUMENT_ERROR;
    904         return;
    905     }
    906     /* initialize data */
    907     converterData=(UConverterDataISCII*)args->converter->extraInfo;
    908     newDelta=converterData->currentDeltaFromUnicode;
    909     range = (uint16_t)(newDelta/DELTA);
    910 
    911     if ((sourceChar = args->converter->fromUChar32)!=0) {
    912         goto getTrail;
    913     }
    914 
    915     /*writing the char to the output stream */
    916     while (source < sourceLimit) {
    917         /* Write the language code following LF only if LF is not the last character. */
    918         if (args->converter->fromUnicodeStatus == LF) {
    919             targetByteUnit = ATR<<8;
    920             targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
    921             args->converter->fromUnicodeStatus = 0x0000;
    922             /* now append ATR and language code */
    923             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
    924             if (U_FAILURE(*err)) {
    925                 break;
    926             }
    927         }
    928 
    929         sourceChar = *source++;
    930         tempContextFromUnicode = converterData->contextCharFromUnicode;
    931 
    932         targetByteUnit = missingCharMarker;
    933 
    934         /*check if input is in ASCII and C0 control codes range*/
    935         if (sourceChar <= ASCII_END) {
    936             args->converter->fromUnicodeStatus = sourceChar;
    937             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
    938             if (U_FAILURE(*err)) {
    939                 break;
    940             }
    941             continue;
    942         }
    943         switch (sourceChar) {
    944         case ZWNJ:
    945             /* contextChar has HALANT */
    946             if (converterData->contextCharFromUnicode) {
    947                 converterData->contextCharFromUnicode = 0x00;
    948                 targetByteUnit = ISCII_HALANT;
    949             } else {
    950                 /* consume ZWNJ and continue */
    951                 converterData->contextCharFromUnicode = 0x00;
    952                 continue;
    953             }
    954             break;
    955         case ZWJ:
    956             /* contextChar has HALANT */
    957             if (converterData->contextCharFromUnicode) {
    958                 targetByteUnit = ISCII_NUKTA;
    959             } else {
    960                 targetByteUnit =ISCII_INV;
    961             }
    962             converterData->contextCharFromUnicode = 0x00;
    963             break;
    964         default:
    965             /* is the sourceChar in the INDIC_RANGE? */
    966             if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
    967                 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
    968                  * does not include these codepoints in all Northern scrips we need to
    969                  * filter them out
    970                  */
    971                 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
    972                     /* find out to which block the souceChar belongs*/
    973                     range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
    974                     newDelta =(uint16_t)(range*DELTA);
    975 
    976                     /* Now are we in the same block as the previous? */
    977                     if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
    978                         converterData->currentDeltaFromUnicode = newDelta;
    979                         converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
    980                         deltaChanged =TRUE;
    981                         converterData->isFirstBuffer=FALSE;
    982                     }
    983 
    984                     if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
    985                         if (sourceChar == PNJ_TIPPI) {
    986                             /* Make sure Tippi is converterd to Bindi. */
    987                             sourceChar = PNJ_BINDI;
    988                         } else if (sourceChar == PNJ_ADHAK) {
    989                             /* This is for consonant cluster handling. */
    990                             converterData->contextCharFromUnicode = PNJ_ADHAK;
    991                         }
    992 
    993                     }
    994                     /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
    995                     /* now subtract the new delta from sourceChar*/
    996                     sourceChar -= converterData->currentDeltaFromUnicode;
    997                 }
    998 
    999                 /* get the target byte unit */
   1000                 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
   1001 
   1002                 /* is the code point valid in current script? */
   1003                 if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
   1004                     /* Vocallic RR is assigned in ISCII Telugu and Unicode */
   1005                     if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
   1006                         targetByteUnit=missingCharMarker;
   1007                     }
   1008                 }
   1009 
   1010                 if (deltaChanged) {
   1011                     /* we are in a script block which is different than
   1012                      * previous sourceChar's script block write ATR and language codes
   1013                      */
   1014                     uint32_t temp=0;
   1015                     temp =(uint16_t)(ATR<<8);
   1016                     temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
   1017                     /* reset */
   1018                     deltaChanged=FALSE;
   1019                     /* now append ATR and language code */
   1020                     WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
   1021                     if (U_FAILURE(*err)) {
   1022                         break;
   1023                     }
   1024                 }
   1025 
   1026                 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
   1027                     continue;
   1028                 }
   1029             }
   1030             /* reset context char */
   1031             converterData->contextCharFromUnicode = 0x00;
   1032             break;
   1033         }
   1034         if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) {
   1035             /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
   1036             /* reset context char */
   1037             converterData->contextCharFromUnicode = 0x0000;
   1038             targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
   1039             /* write targetByteUnit to target */
   1040             WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
   1041             if (U_FAILURE(*err)) {
   1042                 break;
   1043             }
   1044         } else if (targetByteUnit != missingCharMarker) {
   1045             if (targetByteUnit==ISCII_HALANT) {
   1046                 converterData->contextCharFromUnicode = (UChar)targetByteUnit;
   1047             }
   1048             /* write targetByteUnit to target*/
   1049             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
   1050             if (U_FAILURE(*err)) {
   1051                 break;
   1052             }
   1053         } else {
   1054             /* oops.. the code point is unassigned */
   1055             /*check if the char is a First surrogate*/
   1056             if (UTF_IS_SURROGATE(sourceChar)) {
   1057                 if (UTF_IS_SURROGATE_FIRST(sourceChar)) {
   1058 getTrail:
   1059                     /*look ahead to find the trail surrogate*/
   1060                     if (source < sourceLimit) {
   1061                         /* test the following code unit */
   1062                         UChar trail= (*source);
   1063                         if (UTF_IS_SECOND_SURROGATE(trail)) {
   1064                             source++;
   1065                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   1066                             *err =U_INVALID_CHAR_FOUND;
   1067                             /* convert this surrogate code point */
   1068                             /* exit this condition tree */
   1069                         } else {
   1070                             /* this is an unmatched lead code unit (1st surrogate) */
   1071                             /* callback(illegal) */
   1072                             *err=U_ILLEGAL_CHAR_FOUND;
   1073                         }
   1074                     } else {
   1075                         /* no more input */
   1076                         *err = U_ZERO_ERROR;
   1077                     }
   1078                 } else {
   1079                     /* this is an unmatched trail code unit (2nd surrogate) */
   1080                     /* callback(illegal) */
   1081                     *err=U_ILLEGAL_CHAR_FOUND;
   1082                 }
   1083             } else {
   1084                 /* callback(unassigned) for a BMP code point */
   1085                 *err = U_INVALID_CHAR_FOUND;
   1086             }
   1087 
   1088             args->converter->fromUChar32=sourceChar;
   1089             break;
   1090         }
   1091     }/* end while(mySourceIndex<mySourceLength) */
   1092 
   1093     /*save the state and return */
   1094     args->source = source;
   1095     args->target = (char*)target;
   1096 }
   1097 
   1098 static const uint16_t lookupTable[][2]={
   1099     { ZERO,       ZERO     },     /*DEFALT*/
   1100     { ZERO,       ZERO     },     /*ROMAN*/
   1101     { DEVANAGARI, DEV_MASK },
   1102     { BENGALI,    BNG_MASK },
   1103     { TAMIL,      TML_MASK },
   1104     { TELUGU,     KND_MASK },
   1105     { BENGALI,    BNG_MASK },
   1106     { ORIYA,      ORI_MASK },
   1107     { KANNADA,    KND_MASK },
   1108     { MALAYALAM,  MLM_MASK },
   1109     { GUJARATI,   GJR_MASK },
   1110     { GURMUKHI,   PNJ_MASK }
   1111 };
   1112 
   1113 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
   1114     /* add offset to current Indic Block */                                              \
   1115     if(targetUniChar>ASCII_END &&                                                        \
   1116            targetUniChar != ZWJ &&                                                       \
   1117            targetUniChar != ZWNJ &&                                                      \
   1118            targetUniChar != DANDA &&                                                     \
   1119            targetUniChar != DOUBLE_DANDA){                                               \
   1120                                                                                          \
   1121            targetUniChar+=(uint16_t)(delta);                                             \
   1122     }                                                                                    \
   1123     /* now write the targetUniChar */                                                    \
   1124     if(target<args->targetLimit){                                                        \
   1125         *(target)++ = (UChar)targetUniChar;                                              \
   1126         if(offsets){                                                                     \
   1127             *(offsets)++ = (int32_t)(offset);                                            \
   1128         }                                                                                \
   1129     }else{                                                                               \
   1130         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
   1131             (UChar)targetUniChar;                                                        \
   1132         *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
   1133     }                                                                                    \
   1134 }
   1135 
   1136 #define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
   1137     targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
   1138     /* is the code point valid in current script? */                                     \
   1139     if(sourceChar> ASCII_END &&                                                          \
   1140             (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode)==0){    \
   1141         /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
   1142         if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
   1143                     targetUniChar!=VOCALLIC_RR){                                         \
   1144             targetUniChar=missingCharMarker;                                             \
   1145         }                                                                                \
   1146     }                                                                                    \
   1147 }
   1148 
   1149 /***********
   1150  *  Rules for ISCII to Unicode converter
   1151  *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
   1152  *  which has both precomposed and decomposed forms characters
   1153  *  pre-context and post-context need to be considered.
   1154  *
   1155  *  Post context
   1156  *  i)  ATR : Attribute code is used to declare the font and script switching.
   1157  *      Currently we only switch scripts and font codes consumed without generating an error
   1158  *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
   1159  *      obsolete characters
   1160  *  Pre context
   1161  *  i)  Halant: if preceeded by a halant then it is a explicit halant
   1162  *  ii) Nukta :
   1163  *       a) if preceeded by a halant then it is a soft halant
   1164  *       b) if preceeded by specific consonants and the ligatures have pre-composed
   1165  *          characters in Unicode then convert to pre-composed characters
   1166  *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
   1167  *
   1168  */
   1169 
   1170 static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
   1171     const char *source = ( char *) args->source;
   1172     UChar *target = args->target;
   1173     const char *sourceLimit = args->sourceLimit;
   1174     const UChar* targetLimit = args->targetLimit;
   1175     uint32_t targetUniChar = 0x0000;
   1176     uint8_t sourceChar = 0x0000;
   1177     UConverterDataISCII* data;
   1178     UChar32* toUnicodeStatus=NULL;
   1179     UChar32 tempTargetUniChar = 0x0000;
   1180     UChar* contextCharToUnicode= NULL;
   1181     UBool found;
   1182     int i;
   1183     int offset = 0;
   1184 
   1185     if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
   1186         *err = U_ILLEGAL_ARGUMENT_ERROR;
   1187         return;
   1188     }
   1189 
   1190     data = (UConverterDataISCII*)(args->converter->extraInfo);
   1191     contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
   1192     toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
   1193 
   1194     while (U_SUCCESS(*err) && source<sourceLimit) {
   1195 
   1196         targetUniChar = missingCharMarker;
   1197 
   1198         if (target < targetLimit) {
   1199             sourceChar = (unsigned char)*(source)++;
   1200 
   1201             /* look at the post-context preform special processing */
   1202             if (*contextCharToUnicode==ATR) {
   1203 
   1204                 /* If we have ATR in *contextCharToUnicode then we need to change our
   1205                  * state to the Indic Script specified by sourceChar
   1206                  */
   1207 
   1208                 /* check if the sourceChar is supported script range*/
   1209                 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
   1210                     data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
   1211                     data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
   1212                 } else if (sourceChar==DEF) {
   1213                     /* switch back to default */
   1214                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
   1215                     data->currentMaskToUnicode = data->defMaskToUnicode;
   1216                 } else {
   1217                     if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
   1218                         /* these are display codes consume and continue */
   1219                     } else {
   1220                         *err =U_ILLEGAL_CHAR_FOUND;
   1221                         /* reset */
   1222                         *contextCharToUnicode=NO_CHAR_MARKER;
   1223                         goto CALLBACK;
   1224                     }
   1225                 }
   1226 
   1227                 /* reset */
   1228                 *contextCharToUnicode=NO_CHAR_MARKER;
   1229 
   1230                 continue;
   1231 
   1232             } else if (*contextCharToUnicode==EXT) {
   1233                 /* check if sourceChar is in 0xA1-0xEE range */
   1234                 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
   1235                     /* We currently support only Anudatta and Devanagari abbreviation sign */
   1236                     if (sourceChar==0xBF || sourceChar == 0xB8) {
   1237                         targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
   1238 
   1239                         /* find out if the mapping is valid in this state */
   1240                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
   1241                             *contextCharToUnicode= NO_CHAR_MARKER;
   1242 
   1243                             /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1244                             if (data->prevToUnicodeStatus) {
   1245                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1246                                 data->prevToUnicodeStatus = 0x0000;
   1247                             }
   1248                             /* write to target */
   1249                             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
   1250 
   1251                             continue;
   1252                         }
   1253                     }
   1254                     /* byte unit is unassigned */
   1255                     targetUniChar = missingCharMarker;
   1256                     *err= U_INVALID_CHAR_FOUND;
   1257                 } else {
   1258                     /* only 0xA1 - 0xEE are legal after EXT char */
   1259                     *contextCharToUnicode= NO_CHAR_MARKER;
   1260                     *err = U_ILLEGAL_CHAR_FOUND;
   1261                 }
   1262                 goto CALLBACK;
   1263             } else if (*contextCharToUnicode==ISCII_INV) {
   1264                 if (sourceChar==ISCII_HALANT) {
   1265                     targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
   1266                 } else {
   1267                     targetUniChar = ZWJ;
   1268                 }
   1269 
   1270                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1271                 if (data->prevToUnicodeStatus) {
   1272                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1273                     data->prevToUnicodeStatus = 0x0000;
   1274                 }
   1275                 /* write to target */
   1276                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
   1277                 /* reset */
   1278                 *contextCharToUnicode=NO_CHAR_MARKER;
   1279             }
   1280 
   1281             /* look at the pre-context and perform special processing */
   1282             switch (sourceChar) {
   1283             case ISCII_INV:
   1284             case EXT: /*falls through*/
   1285             case ATR:
   1286                 *contextCharToUnicode = (UChar)sourceChar;
   1287 
   1288                 if (*toUnicodeStatus != missingCharMarker) {
   1289                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1290                     if (data->prevToUnicodeStatus) {
   1291                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1292                         data->prevToUnicodeStatus = 0x0000;
   1293                     }
   1294                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
   1295                     *toUnicodeStatus = missingCharMarker;
   1296                 }
   1297                 continue;
   1298             case ISCII_DANDA:
   1299                 /* handle double danda*/
   1300                 if (*contextCharToUnicode== ISCII_DANDA) {
   1301                     targetUniChar = DOUBLE_DANDA;
   1302                     /* clear the context */
   1303                     *contextCharToUnicode = NO_CHAR_MARKER;
   1304                     *toUnicodeStatus = missingCharMarker;
   1305                 } else {
   1306                     GET_MAPPING(sourceChar,targetUniChar,data);
   1307                     *contextCharToUnicode = sourceChar;
   1308                 }
   1309                 break;
   1310             case ISCII_HALANT:
   1311                 /* handle explicit halant */
   1312                 if (*contextCharToUnicode == ISCII_HALANT) {
   1313                     targetUniChar = ZWNJ;
   1314                     /* clear the context */
   1315                     *contextCharToUnicode = NO_CHAR_MARKER;
   1316                 } else {
   1317                     GET_MAPPING(sourceChar,targetUniChar,data);
   1318                     *contextCharToUnicode = sourceChar;
   1319                 }
   1320                 break;
   1321             case 0x0A:
   1322                 /* fall through */
   1323             case 0x0D:
   1324                 data->resetToDefaultToUnicode = TRUE;
   1325                 GET_MAPPING(sourceChar,targetUniChar,data)
   1326                 ;
   1327                 *contextCharToUnicode = sourceChar;
   1328                 break;
   1329 
   1330             case ISCII_VOWEL_SIGN_E:
   1331                 i=1;
   1332                 found=FALSE;
   1333                 for (; i<vowelSignESpecialCases[0][0]; i++) {
   1334                     if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
   1335                         targetUniChar=vowelSignESpecialCases[i][1];
   1336                         found=TRUE;
   1337                         break;
   1338                     }
   1339                 }
   1340                 if (found) {
   1341                     /* find out if the mapping is valid in this state */
   1342                     if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
   1343                         /*targetUniChar += data->currentDeltaToUnicode ;*/
   1344                         *contextCharToUnicode= NO_CHAR_MARKER;
   1345                         *toUnicodeStatus = missingCharMarker;
   1346                         break;
   1347                     }
   1348                 }
   1349                 GET_MAPPING(sourceChar,targetUniChar,data);
   1350                 *contextCharToUnicode = sourceChar;
   1351                 break;
   1352 
   1353             case ISCII_NUKTA:
   1354                 /* handle soft halant */
   1355                 if (*contextCharToUnicode == ISCII_HALANT) {
   1356                     targetUniChar = ZWJ;
   1357                     /* clear the context */
   1358                     *contextCharToUnicode = NO_CHAR_MARKER;
   1359                     break;
   1360                 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
   1361                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1362                     if (data->prevToUnicodeStatus) {
   1363                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1364                         data->prevToUnicodeStatus = 0x0000;
   1365                     }
   1366                     /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
   1367                      * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
   1368                      */
   1369                     targetUniChar = PNJ_RRA;
   1370                     WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
   1371                     if (U_SUCCESS(*err)) {
   1372                         targetUniChar = PNJ_SIGN_VIRAMA;
   1373                         WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
   1374                         if (U_SUCCESS(*err)) {
   1375                             targetUniChar = PNJ_HA;
   1376                             WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
   1377                         } else {
   1378                             args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
   1379                         }
   1380                     } else {
   1381                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
   1382                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
   1383                     }
   1384                     *toUnicodeStatus = missingCharMarker;
   1385                     data->contextCharToUnicode = NO_CHAR_MARKER;
   1386                     continue;
   1387                 } else {
   1388                     /* try to handle <CHAR> + ISCII_NUKTA special mappings */
   1389                     i=1;
   1390                     found =FALSE;
   1391                     for (; i<nuktaSpecialCases[0][0]; i++) {
   1392                         if (nuktaSpecialCases[i][0]==(uint8_t)
   1393                                 *contextCharToUnicode) {
   1394                             targetUniChar=nuktaSpecialCases[i][1];
   1395                             found =TRUE;
   1396                             break;
   1397                         }
   1398                     }
   1399                     if (found) {
   1400                         /* find out if the mapping is valid in this state */
   1401                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
   1402                             /*targetUniChar += data->currentDeltaToUnicode ;*/
   1403                             *contextCharToUnicode= NO_CHAR_MARKER;
   1404                             *toUnicodeStatus = missingCharMarker;
   1405                             if (data->currentDeltaToUnicode == PNJ_DELTA) {
   1406                                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1407                                 if (data->prevToUnicodeStatus) {
   1408                                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1409                                     data->prevToUnicodeStatus = 0x0000;
   1410                                 }
   1411                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
   1412                                 continue;
   1413                             }
   1414                             break;
   1415                         }
   1416                         /* else fall through to default */
   1417                     }
   1418                     /* else fall through to default */
   1419                 }
   1420             default:GET_MAPPING(sourceChar,targetUniChar,data)
   1421                 ;
   1422                 *contextCharToUnicode = sourceChar;
   1423                 break;
   1424             }
   1425 
   1426             if (*toUnicodeStatus != missingCharMarker) {
   1427                 /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
   1428                 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&
   1429                         (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
   1430                     /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
   1431                     offset = (int)(source-args->source - 3);
   1432                     tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
   1433                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
   1434                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
   1435                     data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
   1436                     *toUnicodeStatus = missingCharMarker;
   1437                     continue;
   1438                 } else {
   1439                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1440                     if (data->prevToUnicodeStatus) {
   1441                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1442                         data->prevToUnicodeStatus = 0x0000;
   1443                     }
   1444                     /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
   1445                      * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
   1446                      */
   1447                     if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) {
   1448                         targetUniChar = PNJ_TIPPI - PNJ_DELTA;
   1449                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
   1450                     } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) {
   1451                         /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
   1452                         data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
   1453                     } else {
   1454                         /* write the previously mapped codepoint */
   1455                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
   1456                     }
   1457                 }
   1458                 *toUnicodeStatus = missingCharMarker;
   1459             }
   1460 
   1461             if (targetUniChar != missingCharMarker) {
   1462                 /* now save the targetUniChar for delayed write */
   1463                 *toUnicodeStatus = (UChar) targetUniChar;
   1464                 if (data->resetToDefaultToUnicode==TRUE) {
   1465                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
   1466                     data->currentMaskToUnicode = data->defMaskToUnicode;
   1467                     data->resetToDefaultToUnicode=FALSE;
   1468                 }
   1469             } else {
   1470 
   1471                 /* we reach here only if targetUniChar == missingCharMarker
   1472                  * so assign codes to reason and err
   1473                  */
   1474                 *err = U_INVALID_CHAR_FOUND;
   1475 CALLBACK:
   1476                 args->converter->toUBytes[0] = (uint8_t) sourceChar;
   1477                 args->converter->toULength = 1;
   1478                 break;
   1479             }
   1480 
   1481         } else {
   1482             *err =U_BUFFER_OVERFLOW_ERROR;
   1483             break;
   1484         }
   1485     }
   1486 
   1487     if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
   1488         /* end of the input stream */
   1489         UConverter *cnv = args->converter;
   1490 
   1491         if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
   1492             /* set toUBytes[] */
   1493             cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
   1494             cnv->toULength = 1;
   1495 
   1496             /* avoid looping on truncated sequences */
   1497             *contextCharToUnicode = NO_CHAR_MARKER;
   1498         } else {
   1499             cnv->toULength = 0;
   1500         }
   1501 
   1502         if (*toUnicodeStatus != missingCharMarker) {
   1503             /* output a remaining target character */
   1504             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
   1505             *toUnicodeStatus = missingCharMarker;
   1506         }
   1507     }
   1508 
   1509     args->target = target;
   1510     args->source = source;
   1511 }
   1512 
   1513 /* structure for SafeClone calculations */
   1514 struct cloneISCIIStruct {
   1515     UConverter cnv;
   1516     UConverterDataISCII mydata;
   1517 };
   1518 
   1519 static UConverter *
   1520 _ISCII_SafeClone(const UConverter *cnv,
   1521               void *stackBuffer,
   1522               int32_t *pBufferSize,
   1523               UErrorCode *status)
   1524 {
   1525     struct cloneISCIIStruct * localClone;
   1526     int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
   1527 
   1528     if (U_FAILURE(*status)) {
   1529         return 0;
   1530     }
   1531 
   1532     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   1533         *pBufferSize = bufferSizeNeeded;
   1534         return 0;
   1535     }
   1536 
   1537     localClone = (struct cloneISCIIStruct *)stackBuffer;
   1538     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   1539 
   1540     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
   1541     localClone->cnv.extraInfo = &localClone->mydata;
   1542     localClone->cnv.isExtraLocal = TRUE;
   1543 
   1544     return &localClone->cnv;
   1545 }
   1546 
   1547 static void
   1548 _ISCIIGetUnicodeSet(const UConverter *cnv,
   1549                     const USetAdder *sa,
   1550                     UConverterUnicodeSet which,
   1551                     UErrorCode *pErrorCode)
   1552 {
   1553     int32_t idx, script;
   1554     uint8_t mask;
   1555 
   1556     /* Since all ISCII versions allow switching to other ISCII
   1557     scripts, we add all roundtrippable characters to this set. */
   1558     sa->addRange(sa->set, 0, ASCII_END);
   1559     for (script = DEVANAGARI; script <= MALAYALAM; script++) {
   1560         mask = (uint8_t)(lookupInitialData[script].maskEnum);
   1561         for (idx = 0; idx < DELTA; idx++) {
   1562             /* added check for TELUGU character */
   1563             if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
   1564                 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
   1565             }
   1566         }
   1567     }
   1568     sa->add(sa->set, DANDA);
   1569     sa->add(sa->set, DOUBLE_DANDA);
   1570     sa->add(sa->set, ZWNJ);
   1571     sa->add(sa->set, ZWJ);
   1572 }
   1573 
   1574 static const UConverterImpl _ISCIIImpl={
   1575 
   1576     UCNV_ISCII,
   1577 
   1578     NULL,
   1579     NULL,
   1580 
   1581     _ISCIIOpen,
   1582     _ISCIIClose,
   1583     _ISCIIReset,
   1584 
   1585     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
   1586     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
   1587     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
   1588     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
   1589     NULL,
   1590 
   1591     NULL,
   1592     _ISCIIgetName,
   1593     NULL,
   1594     _ISCII_SafeClone,
   1595     _ISCIIGetUnicodeSet
   1596 };
   1597 
   1598 static const UConverterStaticData _ISCIIStaticData={
   1599     sizeof(UConverterStaticData),
   1600         "ISCII",
   1601          0,
   1602          UCNV_IBM,
   1603          UCNV_ISCII,
   1604          1,
   1605          4,
   1606         { 0x1a, 0, 0, 0 },
   1607         0x1,
   1608         FALSE,
   1609         FALSE,
   1610         0x0,
   1611         0x0,
   1612         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
   1613 
   1614 };
   1615 
   1616 const UConverterSharedData _ISCIIData={
   1617     sizeof(UConverterSharedData),
   1618         ~((uint32_t) 0),
   1619         NULL,
   1620         NULL,
   1621         &_ISCIIStaticData,
   1622         FALSE,
   1623         &_ISCIIImpl,
   1624         0
   1625 };
   1626 
   1627 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   1628