Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2009, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.c
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "ucnv_imp.h"
     38 #include "ucnv_bld.h"
     39 #include "ucnv_cnv.h"
     40 #include "ucnvmbcs.h"
     41 #include "cstring.h"
     42 #include "cmemory.h"
     43 
     44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     45 
     46 #ifdef U_ENABLE_GENERIC_ISO_2022
     47 /*
     48  * I am disabling the generic ISO-2022 converter after proposing to do so on
     49  * the icu mailing list two days ago.
     50  *
     51  * Reasons:
     52  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     53  *    its designation sequences, single shifts with return to the previous state,
     54  *    switch-with-no-return to UTF-16BE or similar, etc.
     55  *    This is unlike the language-specific variants like ISO-2022-JP which
     56  *    require a much smaller repertoire of ISO-2022 features.
     57  *    These variants continue to be supported.
     58  * 2. I believe that no one is really using the generic ISO-2022 converter
     59  *    but rather always one of the language-specific variants.
     60  *    Note that ICU's generic ISO-2022 converter has always output one escape
     61  *    sequence followed by UTF-8 for the whole stream.
     62  * 3. Switching between subcharsets is extremely slow, because each time
     63  *    the previous converter is closed and a new one opened,
     64  *    without any kind of caching, least-recently-used list, etc.
     65  * 4. The code is currently buggy, and given the above it does not seem
     66  *    reasonable to spend the time on maintenance.
     67  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     68  *    This means, for example, that when ISO-8859-7 is designated, the following
     69  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     70  *    The ICU ISO-2022 converter does not handle this - and has no information
     71  *    about which subconverter would have to be shifted vs. which is designed
     72  *    for 7-bit ISO-2022.
     73  *
     74  * Markus Scherer 2003-dec-03
     75  */
     76 #endif
     77 
     78 static const char SHIFT_IN_STR[]  = "\x0F";
     79 static const char SHIFT_OUT_STR[] = "\x0E";
     80 
     81 #define CR      0x0D
     82 #define LF      0x0A
     83 #define H_TAB   0x09
     84 #define V_TAB   0x0B
     85 #define SPACE   0x20
     86 
     87 enum {
     88     HWKANA_START=0xff61,
     89     HWKANA_END=0xff9f
     90 };
     91 
     92 /*
     93  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     94  * as bytes 21..7E. (Subtract 0x80.)
     95  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     96  * as bytes 20..7F. (Subtract 0x80.)
     97  * Do not encode C1 control codes with native bytes 80..9F
     98  * as bytes 00..1F (C0 control codes).
     99  */
    100 enum {
    101     GR94_START=0xa1,
    102     GR94_END=0xfe,
    103     GR96_START=0xa0,
    104     GR96_END=0xff
    105 };
    106 
    107 /*
    108  * ISO 2022 control codes must not be converted from Unicode
    109  * because they would mess up the byte stream.
    110  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    111  * corresponding to SO, SI, and ESC.
    112  */
    113 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    114 
    115 /* for ISO-2022-JP and -CN implementations */
    116 typedef enum  {
    117         /* shared values */
    118         INVALID_STATE=-1,
    119         ASCII = 0,
    120 
    121         SS2_STATE=0x10,
    122         SS3_STATE,
    123 
    124         /* JP */
    125         ISO8859_1 = 1 ,
    126         ISO8859_7 = 2 ,
    127         JISX201  = 3,
    128         JISX208 = 4,
    129         JISX212 = 5,
    130         GB2312  =6,
    131         KSC5601 =7,
    132         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    133 
    134         /* CN */
    135         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    136         GB2312_1=1,
    137         ISO_IR_165=2,
    138         CNS_11643=3,
    139 
    140         /*
    141          * these are used in StateEnum and ISO2022State variables,
    142          * but CNS_11643 must be used to index into myConverterArray[]
    143          */
    144         CNS_11643_0=0x20,
    145         CNS_11643_1,
    146         CNS_11643_2,
    147         CNS_11643_3,
    148         CNS_11643_4,
    149         CNS_11643_5,
    150         CNS_11643_6,
    151         CNS_11643_7
    152 } StateEnum;
    153 
    154 /* is the StateEnum charset value for a DBCS charset? */
    155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    156 
    157 #define CSM(cs) ((uint16_t)1<<(cs))
    158 
    159 /*
    160  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    161  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    162  *
    163  * Note: The converter uses some leniency:
    164  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    165  *   all versions, not just JIS7 and JIS8.
    166  * - ICU does not distinguish between different versions of JIS X 0208.
    167  */
    168 enum { MAX_JA_VERSION=4 };
    169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    170     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    171     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    174     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    175 };
    176 
    177 typedef enum {
    178         ASCII1=0,
    179         LATIN1,
    180         SBCS,
    181         DBCS,
    182         MBCS,
    183         HWKANA
    184 }Cnv2022Type;
    185 
    186 typedef struct ISO2022State {
    187     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    188     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    189     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    190 } ISO2022State;
    191 
    192 #define UCNV_OPTIONS_VERSION_MASK 0xf
    193 #define UCNV_2022_MAX_CONVERTERS 10
    194 
    195 typedef struct{
    196     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    197     UConverter *currentConverter;
    198     Cnv2022Type currentType;
    199     ISO2022State toU2022State, fromU2022State;
    200     uint32_t key;
    201     uint32_t version;
    202 #ifdef U_ENABLE_GENERIC_ISO_2022
    203     UBool isFirstBuffer;
    204 #endif
    205     UBool isEmptySegment;
    206     char name[30];
    207     char locale[3];
    208 }UConverterDataISO2022;
    209 
    210 /* Protos */
    211 /* ISO-2022 ----------------------------------------------------------------- */
    212 
    213 /*Forward declaration */
    214 U_CFUNC void
    215 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    216                       UErrorCode * err);
    217 U_CFUNC void
    218 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    219                                     UErrorCode * err);
    220 
    221 #define ESC_2022 0x1B /*ESC*/
    222 
    223 typedef enum
    224 {
    225         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    226         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    227         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    228         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    229 } UCNV_TableStates_2022;
    230 
    231 /*
    232 * The way these state transition arrays work is:
    233 * ex : ESC$B is the sequence for JISX208
    234 *      a) First Iteration: char is ESC
    235 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    236 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    237 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    238 *             value of x is stored at escSeqStateTable_Key_2022[0]
    239 *        iii) Save this index as offset
    240 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    241 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    242 *     b) Switch on this state and continue to next char
    243 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    244 *             which is normalize_esq_chars_2022[36] == 4
    245 *         ii) x is currently 1(from above)
    246 *               x<<=5 -- x is now 32
    247 *               x+=normalize_esq_chars_2022[36]
    248 *               now x is 36
    249 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    250 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    251 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    252 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    253 *     c) Switch on this state and continue to next char
    254 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    255 *        ii) x is currently 36 (from above)
    256 *            x<<=5 -- x is now 1152
    257 *            x+=normalize_esq_chars_2022[66]
    258 *            now x is 1161
    259 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    260 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    261 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    262 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    263 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    264 */
    265 
    266 
    267 /*Below are the 3 arrays depicting a state transition table*/
    268 static const int8_t normalize_esq_chars_2022[256] = {
    269 /*       0      1       2       3       4      5       6        7       8       9           */
    270 
    271          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    272         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    273         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    274         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    275         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    276         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    277         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    278         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    279         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    280         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    281         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    282         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0
    297 };
    298 
    299 #ifdef U_ENABLE_GENERIC_ISO_2022
    300 /*
    301  * When the generic ISO-2022 converter is completely removed, not just disabled
    302  * per #ifdef, then the following state table and the associated tables that are
    303  * dimensioned with MAX_STATES_2022 should be trimmed.
    304  *
    305  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    306  * the associated escape sequences starting with ESC ( B should be removed.
    307  * This includes the ones with key values 1097 and all of the ones above 1000000.
    308  *
    309  * For the latter, the tables can simply be truncated.
    310  * For the former, since the tables must be kept parallel, it is probably best
    311  * to simply duplicate an adjacent table cell, parallel in all tables.
    312  *
    313  * It may make sense to restructure the tables, especially by using small search
    314  * tables for the variants instead of indexing them parallel to the table here.
    315  */
    316 #endif
    317 
    318 #define MAX_STATES_2022 74
    319 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    320 /*   0           1           2           3           4           5           6           7           8           9           */
    321 
    322      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    323     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    324     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    325     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    326     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    327     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    328     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    329     ,35947631   ,35947635   ,35947636   ,35947638
    330 };
    331 
    332 #ifdef U_ENABLE_GENERIC_ISO_2022
    333 
    334 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    335  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    336 
    337      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    338     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    339     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    340     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    341     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    342     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    343     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    344     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    345 };
    346 
    347 #endif
    348 
    349 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    350 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    351      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    352     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    353     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    354     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    355     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    357     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    358     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    359 };
    360 
    361 
    362 /* Type def for refactoring changeState_2022 code*/
    363 typedef enum{
    364 #ifdef U_ENABLE_GENERIC_ISO_2022
    365     ISO_2022=0,
    366 #endif
    367     ISO_2022_JP=1,
    368     ISO_2022_KR=2,
    369     ISO_2022_CN=3
    370 } Variant2022;
    371 
    372 /*********** ISO 2022 Converter Protos ***********/
    373 static void
    374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    375 
    376 static void
    377  _ISO2022Close(UConverter *converter);
    378 
    379 static void
    380 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    381 
    382 static const char*
    383 _ISO2022getName(const UConverter* cnv);
    384 
    385 static void
    386 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    387 
    388 static UConverter *
    389 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    390 
    391 #ifdef U_ENABLE_GENERIC_ISO_2022
    392 static void
    393 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    394 #endif
    395 
    396 /*const UConverterSharedData _ISO2022Data;*/
    397 static const UConverterSharedData _ISO2022JPData;
    398 static const UConverterSharedData _ISO2022KRData;
    399 static const UConverterSharedData _ISO2022CNData;
    400 
    401 /*************** Converter implementations ******************/
    402 
    403 /* The purpose of this function is to get around gcc compiler warnings. */
    404 static U_INLINE void
    405 fromUWriteUInt8(UConverter *cnv,
    406                  const char *bytes, int32_t length,
    407                  uint8_t **target, const char *targetLimit,
    408                  int32_t **offsets,
    409                  int32_t sourceIndex,
    410                  UErrorCode *pErrorCode)
    411 {
    412     char *targetChars = (char *)*target;
    413     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    414                          offsets, sourceIndex, pErrorCode);
    415     *target = (uint8_t*)targetChars;
    416 
    417 }
    418 
    419 static U_INLINE void
    420 setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){
    421     if(myConverterData->version == 1) {
    422         UConverter *cnv = myConverterData->currentConverter;
    423 
    424         cnv->toUnicodeStatus=0;     /* offset */
    425         cnv->mode=0;                /* state */
    426         cnv->toULength=0;           /* byteIndex */
    427     }
    428 }
    429 
    430 static U_INLINE void
    431 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    432    /* in ISO-2022-KR the designator sequence appears only once
    433     * in a file so we append it only once
    434     */
    435     if( converter->charErrorBufferLength==0){
    436 
    437         converter->charErrorBufferLength = 4;
    438         converter->charErrorBuffer[0] = 0x1b;
    439         converter->charErrorBuffer[1] = 0x24;
    440         converter->charErrorBuffer[2] = 0x29;
    441         converter->charErrorBuffer[3] = 0x43;
    442     }
    443     if(myConverterData->version == 1) {
    444         UConverter *cnv = myConverterData->currentConverter;
    445 
    446         cnv->fromUChar32=0;
    447         cnv->fromUnicodeStatus=1;   /* prevLength */
    448     }
    449 }
    450 
    451 static void
    452 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    453 
    454     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    455 
    456     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    457     if(cnv->extraInfo != NULL) {
    458         UConverterNamePieces stackPieces;
    459         UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) };
    460         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    461         uint32_t version;
    462 
    463         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    464 
    465         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    466         myConverterData->currentType = ASCII1;
    467         cnv->fromUnicodeStatus =FALSE;
    468         if(pArgs->locale){
    469             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    470         }
    471         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    472         myConverterData->version = version;
    473 
    474         /* BEGIN android-changed */
    475         /* The "jk" locale ID was made up for KDDI ISO-2022-JP. */
    476         /* The "js" locale ID was made up for SoftBank ISO-2022-JP. */
    477         if((myLocale[0]=='j' &&
    478             (myLocale[1]=='a'|| myLocale[1]=='p' || myLocale[1]=='k' ||
    479              myLocale[1]=='s') &&
    480             (myLocale[2]=='_' || myLocale[2]=='\0')))
    481         {
    482             size_t len=0;
    483             /* open the required converters and cache them */
    484             if(version>MAX_JA_VERSION) {
    485                 /* prevent indexing beyond jpCharsetMasks[] */
    486                 myConverterData->version = version = 0;
    487             }
    488             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    489                 myConverterData->myConverterArray[ISO8859_7] =
    490                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    491             }
    492             if (myLocale[1]=='k') {  /* Use KDDI's version. */
    493                 myConverterData->myConverterArray[JISX208] =
    494                     ucnv_loadSharedData("kddi-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    495             } else if (myLocale[1]=='s') {  /* Use SoftBank's version. */
    496                 myConverterData->myConverterArray[JISX208] =
    497                     ucnv_loadSharedData("softbank-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    498             } else {
    499                 myConverterData->myConverterArray[JISX208] =
    500                     ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
    501             }
    502             /* END android-changed */
    503 
    504             if(jpCharsetMasks[version]&CSM(JISX212)) {
    505                 myConverterData->myConverterArray[JISX212] =
    506                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    507             }
    508             if(jpCharsetMasks[version]&CSM(GB2312)) {
    509                 myConverterData->myConverterArray[GB2312] =
    510                     /* BEGIN android-changed */
    511                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
    512                     /* END android-changed */
    513             }
    514             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    515                 myConverterData->myConverterArray[KSC5601] =
    516                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    517             }
    518 
    519             /* set the function pointers to appropriate funtions */
    520             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    521             uprv_strcpy(myConverterData->locale,"ja");
    522 
    523             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    524             len = uprv_strlen(myConverterData->name);
    525             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    526             myConverterData->name[len+1]='\0';
    527         }
    528         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    529             (myLocale[2]=='_' || myLocale[2]=='\0'))
    530         {
    531             const char *cnvName;
    532             if(version==1) {
    533                 cnvName="icu-internal-25546";
    534             } else {
    535                 /* BEGIN android-changed */
    536                 cnvName="ksc_5601";
    537                 /* END android-changed */
    538                 myConverterData->version=version=0;
    539             }
    540             if(pArgs->onlyTestIsLoadable) {
    541                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    542                 uprv_free(cnv->extraInfo);
    543                 cnv->extraInfo=NULL;
    544                 return;
    545             } else {
    546                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    547                 if (U_FAILURE(*errorCode)) {
    548                     _ISO2022Close(cnv);
    549                     return;
    550                 }
    551 
    552                 if(version==1) {
    553                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    554                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    555                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    556                 }else{
    557                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    558                 }
    559 
    560                 /* initialize the state variables */
    561                 setInitialStateToUnicodeKR(cnv, myConverterData);
    562                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    563 
    564                 /* set the function pointers to appropriate funtions */
    565                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    566                 uprv_strcpy(myConverterData->locale,"ko");
    567             }
    568         }
    569         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    570             (myLocale[2]=='_' || myLocale[2]=='\0'))
    571         {
    572 
    573             /* open the required converters and cache them */
    574             /* BEGIN android-changed */
    575             myConverterData->myConverterArray[GB2312_1] =
    576                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    577             if(version==1) {
    578                 myConverterData->myConverterArray[ISO_IR_165] =
    579                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    580             }
    581             myConverterData->myConverterArray[CNS_11643] =
    582                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    583             /* END android-changed */
    584 
    585 
    586             /* set the function pointers to appropriate funtions */
    587             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    588             uprv_strcpy(myConverterData->locale,"cn");
    589 
    590             if (version==1){
    591                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    592             }else{
    593                 myConverterData->version = 0;
    594                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    595             }
    596         }
    597         else{
    598 #ifdef U_ENABLE_GENERIC_ISO_2022
    599             myConverterData->isFirstBuffer = TRUE;
    600 
    601             /* append the UTF-8 escape sequence */
    602             cnv->charErrorBufferLength = 3;
    603             cnv->charErrorBuffer[0] = 0x1b;
    604             cnv->charErrorBuffer[1] = 0x25;
    605             cnv->charErrorBuffer[2] = 0x42;
    606 
    607             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    608             /* initialize the state variables */
    609             uprv_strcpy(myConverterData->name,"ISO_2022");
    610 #else
    611             *errorCode = U_UNSUPPORTED_ERROR;
    612             return;
    613 #endif
    614         }
    615 
    616         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    617 
    618         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    619             _ISO2022Close(cnv);
    620         }
    621     } else {
    622         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    623     }
    624 }
    625 
    626 
    627 static void
    628 _ISO2022Close(UConverter *converter) {
    629     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    630     UConverterSharedData **array = myData->myConverterArray;
    631     int32_t i;
    632 
    633     if (converter->extraInfo != NULL) {
    634         /*close the array of converter pointers and free the memory*/
    635         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    636             if(array[i]!=NULL) {
    637                 ucnv_unloadSharedDataIfReady(array[i]);
    638             }
    639         }
    640 
    641         ucnv_close(myData->currentConverter);
    642 
    643         if(!converter->isExtraLocal){
    644             uprv_free (converter->extraInfo);
    645             converter->extraInfo = NULL;
    646         }
    647     }
    648 }
    649 
    650 static void
    651 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    652     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    653     if(choice<=UCNV_RESET_TO_UNICODE) {
    654         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    655         myConverterData->key = 0;
    656         myConverterData->isEmptySegment = FALSE;
    657     }
    658     if(choice!=UCNV_RESET_TO_UNICODE) {
    659         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    660     }
    661 #ifdef U_ENABLE_GENERIC_ISO_2022
    662     if(myConverterData->locale[0] == 0){
    663         if(choice<=UCNV_RESET_TO_UNICODE) {
    664             myConverterData->isFirstBuffer = TRUE;
    665             myConverterData->key = 0;
    666             if (converter->mode == UCNV_SO){
    667                 ucnv_close (myConverterData->currentConverter);
    668                 myConverterData->currentConverter=NULL;
    669             }
    670             converter->mode = UCNV_SI;
    671         }
    672         if(choice!=UCNV_RESET_TO_UNICODE) {
    673             /* re-append UTF-8 escape sequence */
    674             converter->charErrorBufferLength = 3;
    675             converter->charErrorBuffer[0] = 0x1b;
    676             converter->charErrorBuffer[1] = 0x28;
    677             converter->charErrorBuffer[2] = 0x42;
    678         }
    679     }
    680     else
    681 #endif
    682     {
    683         /* reset the state variables */
    684         if(myConverterData->locale[0] == 'k'){
    685             if(choice<=UCNV_RESET_TO_UNICODE) {
    686                 setInitialStateToUnicodeKR(converter, myConverterData);
    687             }
    688             if(choice!=UCNV_RESET_TO_UNICODE) {
    689                 setInitialStateFromUnicodeKR(converter, myConverterData);
    690             }
    691         }
    692     }
    693 }
    694 
    695 static const char*
    696 _ISO2022getName(const UConverter* cnv){
    697     if(cnv->extraInfo){
    698         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    699         return myData->name;
    700     }
    701     return NULL;
    702 }
    703 
    704 
    705 /*************** to unicode *******************/
    706 /****************************************************************************
    707  * Recognized escape sequences are
    708  * <ESC>(B  ASCII
    709  * <ESC>.A  ISO-8859-1
    710  * <ESC>.F  ISO-8859-7
    711  * <ESC>(J  JISX-201
    712  * <ESC>(I  JISX-201
    713  * <ESC>$B  JISX-208
    714  * <ESC>$@  JISX-208
    715  * <ESC>$(D JISX-212
    716  * <ESC>$A  GB2312
    717  * <ESC>$(C KSC5601
    718  */
    719 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    720 /*      0                1               2               3               4               5               6               7               8               9    */
    721     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    722     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    723     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    724     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    725     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    726     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    727     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    728     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    729 };
    730 
    731 /*************** to unicode *******************/
    732 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    733 /*      0                1               2               3               4               5               6               7               8               9    */
    734      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    735     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    736     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    737     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    738     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    739     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    740     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    741     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    742 };
    743 
    744 
    745 static UCNV_TableStates_2022
    746 getKey_2022(char c,int32_t* key,int32_t* offset){
    747     int32_t togo;
    748     int32_t low = 0;
    749     int32_t hi = MAX_STATES_2022;
    750     int32_t oldmid=0;
    751 
    752     togo = normalize_esq_chars_2022[(uint8_t)c];
    753     if(togo == 0) {
    754         /* not a valid character anywhere in an escape sequence */
    755         *key = 0;
    756         *offset = 0;
    757         return INVALID_2022;
    758     }
    759     togo = (*key << 5) + togo;
    760 
    761     while (hi != low)  /*binary search*/{
    762 
    763         register int32_t mid = (hi+low) >> 1; /*Finds median*/
    764 
    765         if (mid == oldmid)
    766             break;
    767 
    768         if (escSeqStateTable_Key_2022[mid] > togo){
    769             hi = mid;
    770         }
    771         else if (escSeqStateTable_Key_2022[mid] < togo){
    772             low = mid;
    773         }
    774         else /*we found it*/{
    775             *key = togo;
    776             *offset = mid;
    777             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    778         }
    779         oldmid = mid;
    780 
    781     }
    782 
    783     *key = 0;
    784     *offset = 0;
    785     return INVALID_2022;
    786 }
    787 
    788 /*runs through a state machine to determine the escape sequence - codepage correspondance
    789  */
    790 static void
    791 changeState_2022(UConverter* _this,
    792                 const char** source,
    793                 const char* sourceLimit,
    794                 Variant2022 var,
    795                 UErrorCode* err){
    796     UCNV_TableStates_2022 value;
    797     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    798     uint32_t key = myData2022->key;
    799     int32_t offset = 0;
    800     int8_t initialToULength = _this->toULength;
    801     char c;
    802 
    803     value = VALID_NON_TERMINAL_2022;
    804     while (*source < sourceLimit) {
    805         c = *(*source)++;
    806         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    807         value = getKey_2022(c,(int32_t *) &key, &offset);
    808 
    809         switch (value){
    810 
    811         case VALID_NON_TERMINAL_2022 :
    812             /* continue with the loop */
    813             break;
    814 
    815         case VALID_TERMINAL_2022:
    816             key = 0;
    817             goto DONE;
    818 
    819         case INVALID_2022:
    820             goto DONE;
    821 
    822         case VALID_MAYBE_TERMINAL_2022:
    823 #ifdef U_ENABLE_GENERIC_ISO_2022
    824             /* ESC ( B is ambiguous only for ISO_2022 itself */
    825             if(var == ISO_2022) {
    826                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    827                 _this->toULength = 0;
    828 
    829                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    830 
    831                 /* continue with the loop */
    832                 value = VALID_NON_TERMINAL_2022;
    833                 break;
    834             } else
    835 #endif
    836             {
    837                 /* not ISO_2022 itself, finish here */
    838                 value = VALID_TERMINAL_2022;
    839                 key = 0;
    840                 goto DONE;
    841             }
    842         }
    843     }
    844 
    845 DONE:
    846     myData2022->key = key;
    847 
    848     if (value == VALID_NON_TERMINAL_2022) {
    849         /* indicate that the escape sequence is incomplete: key!=0 */
    850         return;
    851     } else if (value == INVALID_2022 ) {
    852         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    853     } else /* value == VALID_TERMINAL_2022 */ {
    854         switch(var){
    855 #ifdef U_ENABLE_GENERIC_ISO_2022
    856         case ISO_2022:
    857         {
    858             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    859             if(chosenConverterName == NULL) {
    860                 /* SS2 or SS3 */
    861                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    862                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    863                 return;
    864             }
    865 
    866             _this->mode = UCNV_SI;
    867             ucnv_close(myData2022->currentConverter);
    868             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    869             if(U_SUCCESS(*err)) {
    870                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    871                 _this->mode = UCNV_SO;
    872             }
    873             break;
    874         }
    875 #endif
    876         case ISO_2022_JP:
    877             {
    878                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    879                 switch(tempState) {
    880                 case INVALID_STATE:
    881                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    882                     break;
    883                 case SS2_STATE:
    884                     if(myData2022->toU2022State.cs[2]!=0) {
    885                         if(myData2022->toU2022State.g<2) {
    886                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    887                         }
    888                         myData2022->toU2022State.g=2;
    889                     } else {
    890                         /* illegal to have SS2 before a matching designator */
    891                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    892                     }
    893                     break;
    894                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    895                 case ISO8859_1:
    896                 case ISO8859_7:
    897                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    898                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    899                     } else {
    900                         /* G2 charset for SS2 */
    901                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    902                     }
    903                     break;
    904                 default:
    905                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    906                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    907                     } else {
    908                         /* G0 charset */
    909                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    910                     }
    911                     break;
    912                 }
    913             }
    914             break;
    915         case ISO_2022_CN:
    916             {
    917                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    918                 switch(tempState) {
    919                 case INVALID_STATE:
    920                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    921                     break;
    922                 case SS2_STATE:
    923                     if(myData2022->toU2022State.cs[2]!=0) {
    924                         if(myData2022->toU2022State.g<2) {
    925                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    926                         }
    927                         myData2022->toU2022State.g=2;
    928                     } else {
    929                         /* illegal to have SS2 before a matching designator */
    930                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    931                     }
    932                     break;
    933                 case SS3_STATE:
    934                     if(myData2022->toU2022State.cs[3]!=0) {
    935                         if(myData2022->toU2022State.g<2) {
    936                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    937                         }
    938                         myData2022->toU2022State.g=3;
    939                     } else {
    940                         /* illegal to have SS3 before a matching designator */
    941                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    942                     }
    943                     break;
    944                 case ISO_IR_165:
    945                     if(myData2022->version==0) {
    946                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    947                         break;
    948                     }
    949                     /*fall through*/
    950                 case GB2312_1:
    951                     /*fall through*/
    952                 case CNS_11643_1:
    953                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    954                     break;
    955                 case CNS_11643_2:
    956                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    957                     break;
    958                 default:
    959                     /* other CNS 11643 planes */
    960                     if(myData2022->version==0) {
    961                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    962                     } else {
    963                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    964                     }
    965                     break;
    966                 }
    967             }
    968             break;
    969         case ISO_2022_KR:
    970             if(offset==0x30){
    971                 /* nothing to be done, just accept this one escape sequence */
    972             } else {
    973                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    974             }
    975             break;
    976 
    977         default:
    978             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    979             break;
    980         }
    981     }
    982     if(U_SUCCESS(*err)) {
    983         _this->toULength = 0;
    984     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
    985         if(_this->toULength>1) {
    986             /*
    987              * Ticket 5691: consistent illegal sequences:
    988              * - We include at least the first byte (ESC) in the illegal sequence.
    989              * - If any of the non-initial bytes could be the start of a character,
    990              *   we stop the illegal sequence before the first one of those.
    991              *   In escape sequences, all following bytes are "printable", that is,
    992              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
    993              *   they are valid single/lead bytes.
    994              *   For simplicity, we always only report the initial ESC byte as the
    995              *   illegal sequence and back out all other bytes we looked at.
    996              */
    997             /* Back out some bytes. */
    998             int8_t backOutDistance=_this->toULength-1;
    999             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
   1000             if(backOutDistance<=bytesFromThisBuffer) {
   1001                 /* same as initialToULength<=1 */
   1002                 *source-=backOutDistance;
   1003             } else {
   1004                 /* Back out bytes from the previous buffer: Need to replay them. */
   1005                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
   1006                 /* same as -(initialToULength-1) */
   1007                 /* preToULength is negative! */
   1008                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
   1009                 *source-=bytesFromThisBuffer;
   1010             }
   1011             _this->toULength=1;
   1012         }
   1013     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
   1014         _this->toUCallbackReason = UCNV_UNASSIGNED;
   1015     }
   1016 }
   1017 
   1018 /*Checks the characters of the buffer against valid 2022 escape sequences
   1019 *if the match we return a pointer to the initial start of the sequence otherwise
   1020 *we return sourceLimit
   1021 */
   1022 /*for 2022 looks ahead in the stream
   1023  *to determine the longest possible convertible
   1024  *data stream
   1025  */
   1026 static U_INLINE const char*
   1027 getEndOfBuffer_2022(const char** source,
   1028                    const char* sourceLimit,
   1029                    UBool flush){
   1030 
   1031     const char* mySource = *source;
   1032 
   1033 #ifdef U_ENABLE_GENERIC_ISO_2022
   1034     if (*source >= sourceLimit)
   1035         return sourceLimit;
   1036 
   1037     do{
   1038 
   1039         if (*mySource == ESC_2022){
   1040             int8_t i;
   1041             int32_t key = 0;
   1042             int32_t offset;
   1043             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1044 
   1045             /* Kludge: I could not
   1046             * figure out the reason for validating an escape sequence
   1047             * twice - once here and once in changeState_2022().
   1048             * is it possible to have an ESC character in a ISO2022
   1049             * byte stream which is valid in a code page? Is it legal?
   1050             */
   1051             for (i=0;
   1052             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1053             i++) {
   1054                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1055             }
   1056             if (value > 0 || *mySource==ESC_2022)
   1057                 return mySource;
   1058 
   1059             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1060                 return sourceLimit;
   1061         }
   1062     }while (++mySource < sourceLimit);
   1063 
   1064     return sourceLimit;
   1065 #else
   1066     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1067         ++mySource;
   1068     }
   1069     return mySource;
   1070 #endif
   1071 }
   1072 
   1073 
   1074 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1075  * any future change in _MBCSFromUChar32() function should be reflected here.
   1076  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1077  */
   1078 static U_INLINE int32_t
   1079 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1080                                          UChar32 c,
   1081                                          uint32_t* value,
   1082                                          UBool useFallback,
   1083                                          int outputType)
   1084 {
   1085     const int32_t *cx;
   1086     const uint16_t *table;
   1087     uint32_t stage2Entry;
   1088     uint32_t myValue;
   1089     int32_t length;
   1090     const uint8_t *p;
   1091     /*
   1092      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1093      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1094      * else U_INTERNAL_PROGRAM_ERROR.
   1095      */
   1096     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1097     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1098         table=sharedData->mbcs.fromUnicodeTable;
   1099         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1100         /* get the bytes and the length for the output */
   1101         if(outputType==MBCS_OUTPUT_2){
   1102             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1103             if(myValue<=0xff) {
   1104                 length=1;
   1105             } else {
   1106                 length=2;
   1107             }
   1108         } else /* outputType==MBCS_OUTPUT_3 */ {
   1109             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1110             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1111             if(myValue<=0xff) {
   1112                 length=1;
   1113             } else if(myValue<=0xffff) {
   1114                 length=2;
   1115             } else {
   1116                 length=3;
   1117             }
   1118         }
   1119         /* is this code point assigned, or do we use fallbacks? */
   1120         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1121             /* assigned */
   1122             *value=myValue;
   1123             return length;
   1124         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1125             /*
   1126              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1127              * There is no way with this data structure for fallback output
   1128              * to be a zero byte.
   1129              */
   1130             *value=myValue;
   1131             return -length;
   1132         }
   1133     }
   1134 
   1135     cx=sharedData->mbcs.extIndexes;
   1136     if(cx!=NULL) {
   1137         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1138     }
   1139 
   1140     /* unassigned */
   1141     return 0;
   1142 }
   1143 
   1144 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1145  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1146  * @param retval pointer to output byte
   1147  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1148  */
   1149 static U_INLINE int32_t
   1150 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1151                                        UChar32 c,
   1152                                        uint32_t* retval,
   1153                                        UBool useFallback)
   1154 {
   1155     const uint16_t *table;
   1156     int32_t value;
   1157     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1158     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1159         return 0;
   1160     }
   1161     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1162     table=sharedData->mbcs.fromUnicodeTable;
   1163     /* get the byte for the output */
   1164     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1165     /* is this code point assigned, or do we use fallbacks? */
   1166     *retval=(uint32_t)(value&0xff);
   1167     if(value>=0xf00) {
   1168         return 1;  /* roundtrip */
   1169     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1170         return -1;  /* fallback taken */
   1171     } else {
   1172         return 0;  /* no mapping */
   1173     }
   1174 }
   1175 
   1176 /*
   1177  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1178  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1179  * to move it to the ISO 2022 range 21..7E.
   1180  * Return 0 if out of range.
   1181  */
   1182 static U_INLINE uint32_t
   1183 _2022FromGR94DBCS(uint32_t value) {
   1184     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1185         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1186     ) {
   1187         return value - 0x8080;  /* shift down to 21..7e byte range */
   1188     } else {
   1189         return 0;  /* not valid for ISO 2022 */
   1190     }
   1191 }
   1192 
   1193 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1194 /*
   1195  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1196  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1197  * unchanged.
   1198  */
   1199 static U_INLINE uint32_t
   1200 _2022ToGR94DBCS(uint32_t value) {
   1201     uint32_t returnValue = value + 0x8080;
   1202     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1203         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1204         return returnValue;
   1205     } else {
   1206         return value;
   1207     }
   1208 }
   1209 #endif
   1210 
   1211 #ifdef U_ENABLE_GENERIC_ISO_2022
   1212 
   1213 /**********************************************************************************
   1214 *  ISO-2022 Converter
   1215 *
   1216 *
   1217 */
   1218 
   1219 static void
   1220 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1221                                                            UErrorCode* err){
   1222     const char* mySourceLimit, *realSourceLimit;
   1223     const char* sourceStart;
   1224     const UChar* myTargetStart;
   1225     UConverter* saveThis;
   1226     UConverterDataISO2022* myData;
   1227     int8_t length;
   1228 
   1229     saveThis = args->converter;
   1230     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1231 
   1232     realSourceLimit = args->sourceLimit;
   1233     while (args->source < realSourceLimit) {
   1234         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1235             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1236             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1237 
   1238             if(args->source < mySourceLimit) {
   1239                 if(myData->currentConverter==NULL) {
   1240                     myData->currentConverter = ucnv_open("ASCII",err);
   1241                     if(U_FAILURE(*err)){
   1242                         return;
   1243                     }
   1244 
   1245                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1246                     saveThis->mode = UCNV_SO;
   1247                 }
   1248 
   1249                 /* convert to before the ESC or until the end of the buffer */
   1250                 myData->isFirstBuffer=FALSE;
   1251                 sourceStart = args->source;
   1252                 myTargetStart = args->target;
   1253                 args->converter = myData->currentConverter;
   1254                 ucnv_toUnicode(args->converter,
   1255                     &args->target,
   1256                     args->targetLimit,
   1257                     &args->source,
   1258                     mySourceLimit,
   1259                     args->offsets,
   1260                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1261                     err);
   1262                 args->converter = saveThis;
   1263 
   1264                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1265                     /* move the overflow buffer */
   1266                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1267                     myData->currentConverter->UCharErrorBufferLength = 0;
   1268                     if(length > 0) {
   1269                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1270                                     myData->currentConverter->UCharErrorBuffer,
   1271                                     length*U_SIZEOF_UCHAR);
   1272                     }
   1273                     return;
   1274                 }
   1275 
   1276                 /*
   1277                  * At least one of:
   1278                  * -Error while converting
   1279                  * -Done with entire buffer
   1280                  * -Need to write offsets or update the current offset
   1281                  *  (leave that up to the code in ucnv.c)
   1282                  *
   1283                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1284                  */
   1285                 if (U_FAILURE(*err) ||
   1286                     (args->source == realSourceLimit) ||
   1287                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1288                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1289                 ) {
   1290                     /* copy partial or error input for truncated detection and error handling */
   1291                     if(U_FAILURE(*err)) {
   1292                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1293                         if(length > 0) {
   1294                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1295                         }
   1296                     } else {
   1297                         length = saveThis->toULength = myData->currentConverter->toULength;
   1298                         if(length > 0) {
   1299                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1300                             if(args->source < mySourceLimit) {
   1301                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1302                             }
   1303                         }
   1304                     }
   1305                     return;
   1306                 }
   1307             }
   1308         }
   1309 
   1310         sourceStart = args->source;
   1311         changeState_2022(args->converter,
   1312                &(args->source),
   1313                realSourceLimit,
   1314                ISO_2022,
   1315                err);
   1316         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1317             /* let the ucnv.c code update its current offset */
   1318             return;
   1319         }
   1320     }
   1321 }
   1322 
   1323 #endif
   1324 
   1325 /*
   1326  * To Unicode Callback helper function
   1327  */
   1328 static void
   1329 toUnicodeCallback(UConverter *cnv,
   1330                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1331                   UErrorCode* err){
   1332     if(sourceChar>0xff){
   1333         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1334         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1335         cnv->toULength = 2;
   1336     }
   1337     else{
   1338         cnv->toUBytes[0] =(char) sourceChar;
   1339         cnv->toULength = 1;
   1340     }
   1341 
   1342     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1343         *err = U_INVALID_CHAR_FOUND;
   1344     }
   1345     else{
   1346         *err = U_ILLEGAL_CHAR_FOUND;
   1347     }
   1348 }
   1349 
   1350 /**************************************ISO-2022-JP*************************************************/
   1351 
   1352 /************************************** IMPORTANT **************************************************
   1353 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1354 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1355 * The converter iterates over each Unicode codepoint
   1356 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1357 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1358 * would do as far as possible.
   1359 *
   1360 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1361 * sure that ISO-2022 is also changed.
   1362 ***************************************************************************************************
   1363 */
   1364 
   1365 /***************************************************************************************************
   1366 * Rules for ISO-2022-jp encoding
   1367 * (i)   Escape sequences must be fully contained within a line they should not
   1368 *       span new lines or CRs
   1369 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1370 *       JIS-Roman character escape sequence should follow before the line terminates
   1371 * (iii) If the first character on the line is represented by two bytes then a two
   1372 *       byte character escape sequence should precede it
   1373 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1374 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1375 *       and invoked with SS2 (ESC N).
   1376 * (vi)  If there is any G0 designation in text, there must be a switch to
   1377 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1378 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1379 *       characters such as tab or CRLF.
   1380 * (vi)  Supported encodings:
   1381 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1382 *
   1383 *  source : RFC-1554
   1384 *
   1385 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1386 *          KSC5601 : alias to ibm-949 mapping table
   1387 *          GB2312 : alias to ibm-1386 mapping table
   1388 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1389 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1390 */
   1391 
   1392 /* preference order of JP charsets */
   1393 static const StateEnum jpCharsetPref[]={
   1394     ASCII,
   1395     JISX201,
   1396     ISO8859_1,
   1397     ISO8859_7,
   1398     JISX208,
   1399     JISX212,
   1400     GB2312,
   1401     KSC5601,
   1402     HWKANA_7BIT
   1403 };
   1404 
   1405 /*
   1406  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1407  * not in order of jpCharsetPref[]!
   1408  */
   1409 static const char escSeqChars[][6] ={
   1410     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1411     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1412     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1413     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1414     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1415     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1416     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1417     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1418     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1419 
   1420 };
   1421 static  const int8_t escSeqCharsLen[] ={
   1422     3, /* length of <ESC>(B  ASCII       */
   1423     3, /* length of <ESC>.A  ISO-8859-1  */
   1424     3, /* length of <ESC>.F  ISO-8859-7  */
   1425     3, /* length of <ESC>(J  JISX-201    */
   1426     3, /* length of <ESC>$B  JISX-208    */
   1427     4, /* length of <ESC>$(D JISX-212    */
   1428     3, /* length of <ESC>$A  GB2312      */
   1429     4, /* length of <ESC>$(C KSC5601     */
   1430     3  /* length of <ESC>(I  HWKANA_7BIT */
   1431 };
   1432 
   1433 /*
   1434 * The iteration over various code pages works this way:
   1435 * i)   Get the currentState from myConverterData->currentState
   1436 * ii)  Check if the character is mapped to a valid character in the currentState
   1437 *      Yes ->  a) set the initIterState to currentState
   1438 *       b) remain in this state until an invalid character is found
   1439 *      No  ->  a) go to the next code page and find the character
   1440 * iii) Before changing the state increment the current state check if the current state
   1441 *      is equal to the intitIteration state
   1442 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1443 *       break and return a U_INVALID_CHARACTER error
   1444 *      No  ->  Continue and find the character in next code page
   1445 *
   1446 *
   1447 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1448 */
   1449 
   1450 /* Map 00..7F to Unicode according to JIS X 0201. */
   1451 static U_INLINE uint32_t
   1452 jisx201ToU(uint32_t value) {
   1453     if(value < 0x5c) {
   1454         return value;
   1455     } else if(value == 0x5c) {
   1456         return 0xa5;
   1457     } else if(value == 0x7e) {
   1458         return 0x203e;
   1459     } else /* value <= 0x7f */ {
   1460         return value;
   1461     }
   1462 }
   1463 
   1464 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1465 static U_INLINE uint32_t
   1466 jisx201FromU(uint32_t value) {
   1467     if(value<=0x7f) {
   1468         if(value!=0x5c && value!=0x7e) {
   1469             return value;
   1470         }
   1471     } else if(value==0xa5) {
   1472         return 0x5c;
   1473     } else if(value==0x203e) {
   1474         return 0x7e;
   1475     }
   1476     return 0xfffe;
   1477 }
   1478 
   1479 /*
   1480  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1481  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1482  * Return 0 if the byte pair is out of range.
   1483  */
   1484 static U_INLINE uint32_t
   1485 _2022FromSJIS(uint32_t value) {
   1486     uint8_t trail;
   1487 
   1488     if(value > 0xEFFC) {
   1489         return 0;  /* beyond JIS X 0208 */
   1490     }
   1491 
   1492     trail = (uint8_t)value;
   1493 
   1494     value &= 0xff00;  /* lead byte */
   1495     if(value <= 0x9f00) {
   1496         value -= 0x7000;
   1497     } else /* 0xe000 <= value <= 0xef00 */ {
   1498         value -= 0xb000;
   1499     }
   1500     value <<= 1;
   1501 
   1502     if(trail <= 0x9e) {
   1503         value -= 0x100;
   1504         if(trail <= 0x7e) {
   1505             value |= trail - 0x1f;
   1506         } else {
   1507             value |= trail - 0x20;
   1508         }
   1509     } else /* trail <= 0xfc */ {
   1510         value |= trail - 0x7e;
   1511     }
   1512     return value;
   1513 }
   1514 
   1515 /*
   1516  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1517  * If either byte is outside 21..7E make sure that the result is not valid
   1518  * for Shift-JIS so that the converter catches it.
   1519  * Some invalid byte values already turn into equally invalid Shift-JIS
   1520  * byte values and need not be tested explicitly.
   1521  */
   1522 static U_INLINE void
   1523 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1524     if(c1&1) {
   1525         ++c1;
   1526         if(c2 <= 0x5f) {
   1527             c2 += 0x1f;
   1528         } else if(c2 <= 0x7e) {
   1529             c2 += 0x20;
   1530         } else {
   1531             c2 = 0;  /* invalid */
   1532         }
   1533     } else {
   1534         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1535             c2 += 0x7e;
   1536         } else {
   1537             c2 = 0;  /* invalid */
   1538         }
   1539     }
   1540     c1 >>= 1;
   1541     if(c1 <= 0x2f) {
   1542         c1 += 0x70;
   1543     } else if(c1 <= 0x3f) {
   1544         c1 += 0xb0;
   1545     } else {
   1546         c1 = 0;  /* invalid */
   1547     }
   1548     bytes[0] = (char)c1;
   1549     bytes[1] = (char)c2;
   1550 }
   1551 
   1552 /*
   1553  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1554  * Katakana.
   1555  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1556  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1557  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1558  */
   1559 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1560     0x2123,  /* U+FF61 */
   1561     0x2156,
   1562     0x2157,
   1563     0x2122,
   1564     0x2126,
   1565     0x2572,
   1566     0x2521,
   1567     0x2523,
   1568     0x2525,
   1569     0x2527,
   1570     0x2529,
   1571     0x2563,
   1572     0x2565,
   1573     0x2567,
   1574     0x2543,
   1575     0x213C,  /* U+FF70 */
   1576     0x2522,
   1577     0x2524,
   1578     0x2526,
   1579     0x2528,
   1580     0x252A,
   1581     0x252B,
   1582     0x252D,
   1583     0x252F,
   1584     0x2531,
   1585     0x2533,
   1586     0x2535,
   1587     0x2537,
   1588     0x2539,
   1589     0x253B,
   1590     0x253D,
   1591     0x253F,  /* U+FF80 */
   1592     0x2541,
   1593     0x2544,
   1594     0x2546,
   1595     0x2548,
   1596     0x254A,
   1597     0x254B,
   1598     0x254C,
   1599     0x254D,
   1600     0x254E,
   1601     0x254F,
   1602     0x2552,
   1603     0x2555,
   1604     0x2558,
   1605     0x255B,
   1606     0x255E,
   1607     0x255F,  /* U+FF90 */
   1608     0x2560,
   1609     0x2561,
   1610     0x2562,
   1611     0x2564,
   1612     0x2566,
   1613     0x2568,
   1614     0x2569,
   1615     0x256A,
   1616     0x256B,
   1617     0x256C,
   1618     0x256D,
   1619     0x256F,
   1620     0x2573,
   1621     0x212B,
   1622     0x212C   /* U+FF9F */
   1623 };
   1624 
   1625 static void
   1626 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1627     UConverter *cnv = args->converter;
   1628     UConverterDataISO2022 *converterData;
   1629     ISO2022State *pFromU2022State;
   1630     uint8_t *target = (uint8_t *) args->target;
   1631     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1632     const UChar* source = args->source;
   1633     const UChar* sourceLimit = args->sourceLimit;
   1634     int32_t* offsets = args->offsets;
   1635     UChar32 sourceChar;
   1636     char buffer[8];
   1637     int32_t len, outLen;
   1638     int8_t choices[10];
   1639     int32_t choiceCount;
   1640     uint32_t targetValue = 0;
   1641     UBool useFallback;
   1642 
   1643     int32_t i;
   1644     int8_t cs, g;
   1645 
   1646     /* set up the state */
   1647     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1648     pFromU2022State   = &converterData->fromU2022State;
   1649 
   1650     choiceCount = 0;
   1651 
   1652     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1653     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1654         goto getTrail;
   1655     }
   1656 
   1657     while(source < sourceLimit) {
   1658         if(target < targetLimit) {
   1659 
   1660             sourceChar  = *(source++);
   1661             /*check if the char is a First surrogate*/
   1662             if(UTF_IS_SURROGATE(sourceChar)) {
   1663                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   1664 getTrail:
   1665                     /*look ahead to find the trail surrogate*/
   1666                     if(source < sourceLimit) {
   1667                         /* test the following code unit */
   1668                         UChar trail=(UChar) *source;
   1669                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   1670                             source++;
   1671                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   1672                             cnv->fromUChar32=0x00;
   1673                             /* convert this supplementary code point */
   1674                             /* exit this condition tree */
   1675                         } else {
   1676                             /* this is an unmatched lead code unit (1st surrogate) */
   1677                             /* callback(illegal) */
   1678                             *err=U_ILLEGAL_CHAR_FOUND;
   1679                             cnv->fromUChar32=sourceChar;
   1680                             break;
   1681                         }
   1682                     } else {
   1683                         /* no more input */
   1684                         cnv->fromUChar32=sourceChar;
   1685                         break;
   1686                     }
   1687                 } else {
   1688                     /* this is an unmatched trail code unit (2nd surrogate) */
   1689                     /* callback(illegal) */
   1690                     *err=U_ILLEGAL_CHAR_FOUND;
   1691                     cnv->fromUChar32=sourceChar;
   1692                     break;
   1693                 }
   1694             }
   1695 
   1696             /* do not convert SO/SI/ESC */
   1697             if(IS_2022_CONTROL(sourceChar)) {
   1698                 /* callback(illegal) */
   1699                 *err=U_ILLEGAL_CHAR_FOUND;
   1700                 cnv->fromUChar32=sourceChar;
   1701                 break;
   1702             }
   1703 
   1704             /* do the conversion */
   1705 
   1706             if(choiceCount == 0) {
   1707                 uint16_t csm;
   1708 
   1709                 /*
   1710                  * The csm variable keeps track of which charsets are allowed
   1711                  * and not used yet while building the choices[].
   1712                  */
   1713                 csm = jpCharsetMasks[converterData->version];
   1714                 choiceCount = 0;
   1715 
   1716                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1717                 if(converterData->version == 3 || converterData->version == 4) {
   1718                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1719                 }
   1720                 /* Do not try single-byte half-width Katakana for other versions. */
   1721                 csm &= ~CSM(HWKANA_7BIT);
   1722 
   1723                 /* try the current G0 charset */
   1724                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1725                 csm &= ~CSM(cs);
   1726 
   1727                 /* try the current G2 charset */
   1728                 if((cs = pFromU2022State->cs[2]) != 0) {
   1729                     choices[choiceCount++] = cs;
   1730                     csm &= ~CSM(cs);
   1731                 }
   1732 
   1733                 /* try all the other possible charsets */
   1734                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1735                     cs = (int8_t)jpCharsetPref[i];
   1736                     if(CSM(cs) & csm) {
   1737                         choices[choiceCount++] = cs;
   1738                         csm &= ~CSM(cs);
   1739                     }
   1740                 }
   1741             }
   1742 
   1743             cs = g = 0;
   1744             /*
   1745              * len==0: no mapping found yet
   1746              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1747              * len>0: found a roundtrip result, done
   1748              */
   1749             len = 0;
   1750             /*
   1751              * We will turn off useFallback after finding a fallback,
   1752              * but we still get fallbacks from PUA code points as usual.
   1753              * Therefore, we will also need to check that we don't overwrite
   1754              * an early fallback with a later one.
   1755              */
   1756             useFallback = cnv->useFallback;
   1757 
   1758             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1759                 uint32_t value;
   1760                 int32_t len2;
   1761                 int8_t cs0 = choices[i];
   1762                 switch(cs0) {
   1763                 case ASCII:
   1764                     if(sourceChar <= 0x7f) {
   1765                         targetValue = (uint32_t)sourceChar;
   1766                         len = 1;
   1767                         cs = cs0;
   1768                         g = 0;
   1769                     }
   1770                     break;
   1771                 case ISO8859_1:
   1772                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1773                         targetValue = (uint32_t)sourceChar - 0x80;
   1774                         len = 1;
   1775                         cs = cs0;
   1776                         g = 2;
   1777                     }
   1778                     break;
   1779                 case HWKANA_7BIT:
   1780                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1781                         if(converterData->version==3) {
   1782                             /* JIS7: use G1 (SO) */
   1783                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1784                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1785                             len = 1;
   1786                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1787                             g = 1;
   1788                         } else if(converterData->version==4) {
   1789                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1790                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1791                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1792                             len = 1;
   1793 
   1794                             cs = pFromU2022State->cs[0];
   1795                             if(IS_JP_DBCS(cs)) {
   1796                                 /* switch from a DBCS charset to JISX201 */
   1797                                 cs = (int8_t)JISX201;
   1798                             }
   1799                             /* else stay in the current G0 charset */
   1800                             g = 0;
   1801                         }
   1802                         /* else do not use HWKANA_7BIT with other versions */
   1803                     }
   1804                     break;
   1805                 case JISX201:
   1806                     /* G0 SBCS */
   1807                     value = jisx201FromU(sourceChar);
   1808                     if(value <= 0x7f) {
   1809                         targetValue = value;
   1810                         len = 1;
   1811                         cs = cs0;
   1812                         g = 0;
   1813                         useFallback = FALSE;
   1814                     }
   1815                     break;
   1816                 case JISX208:
   1817                     /* G0 DBCS from Shift-JIS table */
   1818                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1819                                 converterData->myConverterArray[cs0],
   1820                                 sourceChar, &value,
   1821                                 useFallback, MBCS_OUTPUT_2);
   1822                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1823                         value = _2022FromSJIS(value);
   1824                         if(value != 0) {
   1825                             targetValue = value;
   1826                             len = len2;
   1827                             cs = cs0;
   1828                             g = 0;
   1829                             useFallback = FALSE;
   1830                         }
   1831                     } else if(len == 0 && useFallback &&
   1832                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1833                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1834                         len = -2;
   1835                         cs = cs0;
   1836                         g = 0;
   1837                         useFallback = FALSE;
   1838                     }
   1839                     break;
   1840                 case ISO8859_7:
   1841                     /* G0 SBCS forced to 7-bit output */
   1842                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1843                                 converterData->myConverterArray[cs0],
   1844                                 sourceChar, &value,
   1845                                 useFallback);
   1846                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1847                         targetValue = value - 0x80;
   1848                         len = len2;
   1849                         cs = cs0;
   1850                         g = 2;
   1851                         useFallback = FALSE;
   1852                     }
   1853                     break;
   1854                 default:
   1855                     /* G0 DBCS */
   1856                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1857                                 converterData->myConverterArray[cs0],
   1858                                 sourceChar, &value,
   1859                                 useFallback, MBCS_OUTPUT_2);
   1860                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1861                         if(cs0 == KSC5601) {
   1862                             /*
   1863                              * Check for valid bytes for the encoding scheme.
   1864                              * This is necessary because the sub-converter (windows-949)
   1865                              * has a broader encoding scheme than is valid for 2022.
   1866                              */
   1867                             value = _2022FromGR94DBCS(value);
   1868                             if(value == 0) {
   1869                                 break;
   1870                             }
   1871                         }
   1872                         targetValue = value;
   1873                         len = len2;
   1874                         cs = cs0;
   1875                         g = 0;
   1876                         useFallback = FALSE;
   1877                     }
   1878                     break;
   1879                 }
   1880             }
   1881 
   1882             if(len != 0) {
   1883                 if(len < 0) {
   1884                     len = -len;  /* fallback */
   1885                 }
   1886                 outLen = 0; /* count output bytes */
   1887 
   1888                 /* write SI if necessary (only for JIS7) */
   1889                 if(pFromU2022State->g == 1 && g == 0) {
   1890                     buffer[outLen++] = UCNV_SI;
   1891                     pFromU2022State->g = 0;
   1892                 }
   1893 
   1894                 /* write the designation sequence if necessary */
   1895                 if(cs != pFromU2022State->cs[g]) {
   1896                     int32_t escLen = escSeqCharsLen[cs];
   1897                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1898                     outLen += escLen;
   1899                     pFromU2022State->cs[g] = cs;
   1900 
   1901                     /* invalidate the choices[] */
   1902                     choiceCount = 0;
   1903                 }
   1904 
   1905                 /* write the shift sequence if necessary */
   1906                 if(g != pFromU2022State->g) {
   1907                     switch(g) {
   1908                     /* case 0 handled before writing escapes */
   1909                     case 1:
   1910                         buffer[outLen++] = UCNV_SO;
   1911                         pFromU2022State->g = 1;
   1912                         break;
   1913                     default: /* case 2 */
   1914                         buffer[outLen++] = 0x1b;
   1915                         buffer[outLen++] = 0x4e;
   1916                         break;
   1917                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1918                     }
   1919                 }
   1920 
   1921                 /* write the output bytes */
   1922                 if(len == 1) {
   1923                     buffer[outLen++] = (char)targetValue;
   1924                 } else /* len == 2 */ {
   1925                     buffer[outLen++] = (char)(targetValue >> 8);
   1926                     buffer[outLen++] = (char)targetValue;
   1927                 }
   1928             } else {
   1929                 /*
   1930                  * if we cannot find the character after checking all codepages
   1931                  * then this is an error
   1932                  */
   1933                 *err = U_INVALID_CHAR_FOUND;
   1934                 cnv->fromUChar32=sourceChar;
   1935                 break;
   1936             }
   1937 
   1938             if(sourceChar == CR || sourceChar == LF) {
   1939                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1940                 pFromU2022State->cs[2] = 0;
   1941                 choiceCount = 0;
   1942             }
   1943 
   1944             /* output outLen>0 bytes in buffer[] */
   1945             if(outLen == 1) {
   1946                 *target++ = buffer[0];
   1947                 if(offsets) {
   1948                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1949                 }
   1950             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1951                 *target++ = buffer[0];
   1952                 *target++ = buffer[1];
   1953                 if(offsets) {
   1954                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1955                     *offsets++ = sourceIndex;
   1956                     *offsets++ = sourceIndex;
   1957                 }
   1958             } else {
   1959                 fromUWriteUInt8(
   1960                     cnv,
   1961                     buffer, outLen,
   1962                     &target, (const char *)targetLimit,
   1963                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1964                     err);
   1965                 if(U_FAILURE(*err)) {
   1966                     break;
   1967                 }
   1968             }
   1969         } /* end if(myTargetIndex<myTargetLength) */
   1970         else{
   1971             *err =U_BUFFER_OVERFLOW_ERROR;
   1972             break;
   1973         }
   1974 
   1975     }/* end while(mySourceIndex<mySourceLength) */
   1976 
   1977     /*
   1978      * the end of the input stream and detection of truncated input
   1979      * are handled by the framework, but for ISO-2022-JP conversion
   1980      * we need to be in ASCII mode at the very end
   1981      *
   1982      * conditions:
   1983      *   successful
   1984      *   in SO mode or not in ASCII mode
   1985      *   end of input and no truncated input
   1986      */
   1987     if( U_SUCCESS(*err) &&
   1988         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   1989         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   1990     ) {
   1991         int32_t sourceIndex;
   1992 
   1993         outLen = 0;
   1994 
   1995         if(pFromU2022State->g != 0) {
   1996             buffer[outLen++] = UCNV_SI;
   1997             pFromU2022State->g = 0;
   1998         }
   1999 
   2000         if(pFromU2022State->cs[0] != ASCII) {
   2001             int32_t escLen = escSeqCharsLen[ASCII];
   2002             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   2003             outLen += escLen;
   2004             pFromU2022State->cs[0] = (int8_t)ASCII;
   2005         }
   2006 
   2007         /* get the source index of the last input character */
   2008         /*
   2009          * TODO this would be simpler and more reliable if we used a pair
   2010          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2011          * so that we could simply use the prevSourceIndex here;
   2012          * this code gives an incorrect result for the rare case of an unmatched
   2013          * trail surrogate that is alone in the last buffer of the text stream
   2014          */
   2015         sourceIndex=(int32_t)(source-args->source);
   2016         if(sourceIndex>0) {
   2017             --sourceIndex;
   2018             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2019                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2020             ) {
   2021                 --sourceIndex;
   2022             }
   2023         } else {
   2024             sourceIndex=-1;
   2025         }
   2026 
   2027         fromUWriteUInt8(
   2028             cnv,
   2029             buffer, outLen,
   2030             &target, (const char *)targetLimit,
   2031             &offsets, sourceIndex,
   2032             err);
   2033     }
   2034 
   2035     /*save the state and return */
   2036     args->source = source;
   2037     args->target = (char*)target;
   2038 }
   2039 
   2040 /*************** to unicode *******************/
   2041 
   2042 static void
   2043 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2044                                                UErrorCode* err){
   2045     char tempBuf[2];
   2046     const char *mySource = (char *) args->source;
   2047     UChar *myTarget = args->target;
   2048     const char *mySourceLimit = args->sourceLimit;
   2049     uint32_t targetUniChar = 0x0000;
   2050     uint32_t mySourceChar = 0x0000;
   2051     uint32_t tmpSourceChar = 0x0000;
   2052     UConverterDataISO2022* myData;
   2053     ISO2022State *pToU2022State;
   2054     StateEnum cs;
   2055 
   2056     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2057     pToU2022State = &myData->toU2022State;
   2058 
   2059     if(myData->key != 0) {
   2060         /* continue with a partial escape sequence */
   2061         goto escape;
   2062     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2063         /* continue with a partial double-byte character */
   2064         mySourceChar = args->converter->toUBytes[0];
   2065         args->converter->toULength = 0;
   2066         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2067         targetUniChar = missingCharMarker;
   2068         goto getTrailByte;
   2069     }
   2070 
   2071     while(mySource < mySourceLimit){
   2072 
   2073         targetUniChar =missingCharMarker;
   2074 
   2075         if(myTarget < args->targetLimit){
   2076 
   2077             mySourceChar= (unsigned char) *mySource++;
   2078 
   2079             switch(mySourceChar) {
   2080             case UCNV_SI:
   2081                 if(myData->version==3) {
   2082                     pToU2022State->g=0;
   2083                     continue;
   2084                 } else {
   2085                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2086                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2087                     break;
   2088                 }
   2089 
   2090             case UCNV_SO:
   2091                 if(myData->version==3) {
   2092                     /* JIS7: switch to G1 half-width Katakana */
   2093                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2094                     pToU2022State->g=1;
   2095                     continue;
   2096                 } else {
   2097                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2098                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2099                     break;
   2100                 }
   2101 
   2102             case ESC_2022:
   2103                 mySource--;
   2104 escape:
   2105                 {
   2106                     const char * mySourceBefore = mySource;
   2107                     int8_t toULengthBefore = args->converter->toULength;
   2108 
   2109                     changeState_2022(args->converter,&(mySource),
   2110                         mySourceLimit, ISO_2022_JP,err);
   2111 
   2112                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2113                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2114                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2115                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2116                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   2117                     }
   2118                 }
   2119 
   2120                 /* invalid or illegal escape sequence */
   2121                 if(U_FAILURE(*err)){
   2122                     args->target = myTarget;
   2123                     args->source = mySource;
   2124                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2125                     return;
   2126                 }
   2127                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2128                 if(myData->key==0) {
   2129                     myData->isEmptySegment = TRUE;
   2130                 }
   2131                 continue;
   2132 
   2133             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2134 
   2135             case CR:
   2136                 /*falls through*/
   2137             case LF:
   2138                 /* automatically reset to single-byte mode */
   2139                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2140                     pToU2022State->cs[0] = (int8_t)ASCII;
   2141                 }
   2142                 pToU2022State->cs[2] = 0;
   2143                 pToU2022State->g = 0;
   2144                 /* falls through */
   2145             default:
   2146                 /* convert one or two bytes */
   2147                 myData->isEmptySegment = FALSE;
   2148                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2149                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2150                     !IS_JP_DBCS(cs)
   2151                 ) {
   2152                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2153                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2154 
   2155                     /* return from a single-shift state to the previous one */
   2156                     if(pToU2022State->g >= 2) {
   2157                         pToU2022State->g=pToU2022State->prevG;
   2158                     }
   2159                 } else switch(cs) {
   2160                 case ASCII:
   2161                     if(mySourceChar <= 0x7f) {
   2162                         targetUniChar = mySourceChar;
   2163                     }
   2164                     break;
   2165                 case ISO8859_1:
   2166                     if(mySourceChar <= 0x7f) {
   2167                         targetUniChar = mySourceChar + 0x80;
   2168                     }
   2169                     /* return from a single-shift state to the previous one */
   2170                     pToU2022State->g=pToU2022State->prevG;
   2171                     break;
   2172                 case ISO8859_7:
   2173                     if(mySourceChar <= 0x7f) {
   2174                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2175                         targetUniChar =
   2176                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2177                                 myData->myConverterArray[cs],
   2178                                 mySourceChar + 0x80);
   2179                     }
   2180                     /* return from a single-shift state to the previous one */
   2181                     pToU2022State->g=pToU2022State->prevG;
   2182                     break;
   2183                 case JISX201:
   2184                     if(mySourceChar <= 0x7f) {
   2185                         targetUniChar = jisx201ToU(mySourceChar);
   2186                     }
   2187                     break;
   2188                 case HWKANA_7BIT:
   2189                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2190                         /* 7-bit halfwidth Katakana */
   2191                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2192                     }
   2193                     break;
   2194                 default:
   2195                     /* G0 DBCS */
   2196                     if(mySource < mySourceLimit) {
   2197                         int leadIsOk, trailIsOk;
   2198                         uint8_t trailByte;
   2199 getTrailByte:
   2200                         trailByte = (uint8_t)*mySource;
   2201                         /*
   2202                          * Ticket 5691: consistent illegal sequences:
   2203                          * - We include at least the first byte in the illegal sequence.
   2204                          * - If any of the non-initial bytes could be the start of a character,
   2205                          *   we stop the illegal sequence before the first one of those.
   2206                          *
   2207                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2208                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2209                          * Otherwise we convert or report the pair of bytes.
   2210                          */
   2211                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2212                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2213                         if (leadIsOk && trailIsOk) {
   2214                             ++mySource;
   2215                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2216                             if(cs == JISX208) {
   2217                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2218                                 mySourceChar = tmpSourceChar;
   2219                             } else {
   2220                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2221                                 mySourceChar = tmpSourceChar;
   2222                                 if (cs == KSC5601) {
   2223                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2224                                 }
   2225                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2226                                 tempBuf[1] = (char)(tmpSourceChar);
   2227                             }
   2228                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2229                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2230                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2231                             ++mySource;
   2232                             /* add another bit so that the code below writes 2 bytes in case of error */
   2233                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2234                         }
   2235                     } else {
   2236                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2237                         args->converter->toULength = 1;
   2238                         goto endloop;
   2239                     }
   2240                 }  /* End of inner switch */
   2241                 break;
   2242             }  /* End of outer switch */
   2243             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2244                 if(args->offsets){
   2245                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2246                 }
   2247                 *(myTarget++)=(UChar)targetUniChar;
   2248             }
   2249             else if(targetUniChar > missingCharMarker){
   2250                 /* disassemble the surrogate pair and write to output*/
   2251                 targetUniChar-=0x0010000;
   2252                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2253                 if(args->offsets){
   2254                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2255                 }
   2256                 ++myTarget;
   2257                 if(myTarget< args->targetLimit){
   2258                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2259                     if(args->offsets){
   2260                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2261                     }
   2262                     ++myTarget;
   2263                 }else{
   2264                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2265                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2266                 }
   2267 
   2268             }
   2269             else{
   2270                 /* Call the callback function*/
   2271                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2272                 break;
   2273             }
   2274         }
   2275         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2276             *err =U_BUFFER_OVERFLOW_ERROR;
   2277             break;
   2278         }
   2279     }
   2280 endloop:
   2281     args->target = myTarget;
   2282     args->source = mySource;
   2283 }
   2284 
   2285 
   2286 /***************************************************************
   2287 *   Rules for ISO-2022-KR encoding
   2288 *   i) The KSC5601 designator sequence should appear only once in a file,
   2289 *      at the begining of a line before any KSC5601 characters. This usually
   2290 *      means that it appears by itself on the first line of the file
   2291 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2292 *      and SI to shift into single byte mode
   2293 */
   2294 static void
   2295 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2296 
   2297     UConverter* saveConv = args->converter;
   2298     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2299     args->converter=myConverterData->currentConverter;
   2300 
   2301     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2302     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2303     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2304 
   2305     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2306         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2307             uprv_memcpy(
   2308                 saveConv->charErrorBuffer,
   2309                 myConverterData->currentConverter->charErrorBuffer,
   2310                 myConverterData->currentConverter->charErrorBufferLength);
   2311         }
   2312         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2313         myConverterData->currentConverter->charErrorBufferLength = 0;
   2314     }
   2315     args->converter=saveConv;
   2316 }
   2317 
   2318 static void
   2319 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2320 
   2321     const UChar *source = args->source;
   2322     const UChar *sourceLimit = args->sourceLimit;
   2323     unsigned char *target = (unsigned char *) args->target;
   2324     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2325     int32_t* offsets = args->offsets;
   2326     uint32_t targetByteUnit = 0x0000;
   2327     UChar32 sourceChar = 0x0000;
   2328     UBool isTargetByteDBCS;
   2329     UBool oldIsTargetByteDBCS;
   2330     UConverterDataISO2022 *converterData;
   2331     UConverterSharedData* sharedData;
   2332     UBool useFallback;
   2333     int32_t length =0;
   2334 
   2335     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2336     /* if the version is 1 then the user is requesting
   2337      * conversion with ibm-25546 pass the arguments to
   2338      * MBCS converter and return
   2339      */
   2340     if(converterData->version==1){
   2341         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2342         return;
   2343     }
   2344 
   2345     /* initialize data */
   2346     sharedData = converterData->currentConverter->sharedData;
   2347     useFallback = args->converter->useFallback;
   2348     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2349     oldIsTargetByteDBCS = isTargetByteDBCS;
   2350 
   2351     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2352     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2353         goto getTrail;
   2354     }
   2355     while(source < sourceLimit){
   2356 
   2357         targetByteUnit = missingCharMarker;
   2358 
   2359         if(target < (unsigned char*) args->targetLimit){
   2360             sourceChar = *source++;
   2361 
   2362             /* do not convert SO/SI/ESC */
   2363             if(IS_2022_CONTROL(sourceChar)) {
   2364                 /* callback(illegal) */
   2365                 *err=U_ILLEGAL_CHAR_FOUND;
   2366                 args->converter->fromUChar32=sourceChar;
   2367                 break;
   2368             }
   2369 
   2370             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2371             if(length < 0) {
   2372                 length = -length;  /* fallback */
   2373             }
   2374             /* only DBCS or SBCS characters are expected*/
   2375             /* DB characters with high bit set to 1 are expected */
   2376             if( length > 2 || length==0 ||
   2377                 (length == 1 && targetByteUnit > 0x7f) ||
   2378                 (length == 2 &&
   2379                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2380                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2381             ) {
   2382                 targetByteUnit=missingCharMarker;
   2383             }
   2384             if (targetByteUnit != missingCharMarker){
   2385 
   2386                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2387                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2388                   /* append the shift sequence */
   2389                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2390 
   2391                     if (isTargetByteDBCS)
   2392                         *target++ = UCNV_SO;
   2393                     else
   2394                         *target++ = UCNV_SI;
   2395                     if(offsets)
   2396                         *(offsets++) = (int32_t)(source - args->source-1);
   2397                 }
   2398                 /* write the targetUniChar  to target */
   2399                 if(targetByteUnit <= 0x00FF){
   2400                     if( target < targetLimit){
   2401                         *(target++) = (unsigned char) targetByteUnit;
   2402                         if(offsets){
   2403                             *(offsets++) = (int32_t)(source - args->source-1);
   2404                         }
   2405 
   2406                     }else{
   2407                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2408                         *err = U_BUFFER_OVERFLOW_ERROR;
   2409                     }
   2410                 }else{
   2411                     if(target < targetLimit){
   2412                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2413                         if(offsets){
   2414                             *(offsets++) = (int32_t)(source - args->source-1);
   2415                         }
   2416                         if(target < targetLimit){
   2417                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2418                             if(offsets){
   2419                                 *(offsets++) = (int32_t)(source - args->source-1);
   2420                             }
   2421                         }else{
   2422                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2423                             *err = U_BUFFER_OVERFLOW_ERROR;
   2424                         }
   2425                     }else{
   2426                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2427                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2428                         *err = U_BUFFER_OVERFLOW_ERROR;
   2429                     }
   2430                 }
   2431 
   2432             }
   2433             else{
   2434                 /* oops.. the code point is unassingned
   2435                  * set the error and reason
   2436                  */
   2437 
   2438                 /*check if the char is a First surrogate*/
   2439                 if(UTF_IS_SURROGATE(sourceChar)) {
   2440                     if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2441 getTrail:
   2442                         /*look ahead to find the trail surrogate*/
   2443                         if(source <  sourceLimit) {
   2444                             /* test the following code unit */
   2445                             UChar trail=(UChar) *source;
   2446                             if(UTF_IS_SECOND_SURROGATE(trail)) {
   2447                                 source++;
   2448                                 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2449                                 *err = U_INVALID_CHAR_FOUND;
   2450                                 /* convert this surrogate code point */
   2451                                 /* exit this condition tree */
   2452                             } else {
   2453                                 /* this is an unmatched lead code unit (1st surrogate) */
   2454                                 /* callback(illegal) */
   2455                                 *err=U_ILLEGAL_CHAR_FOUND;
   2456                             }
   2457                         } else {
   2458                             /* no more input */
   2459                             *err = U_ZERO_ERROR;
   2460                         }
   2461                     } else {
   2462                         /* this is an unmatched trail code unit (2nd surrogate) */
   2463                         /* callback(illegal) */
   2464                         *err=U_ILLEGAL_CHAR_FOUND;
   2465                     }
   2466                 } else {
   2467                     /* callback(unassigned) for a BMP code point */
   2468                     *err = U_INVALID_CHAR_FOUND;
   2469                 }
   2470 
   2471                 args->converter->fromUChar32=sourceChar;
   2472                 break;
   2473             }
   2474         } /* end if(myTargetIndex<myTargetLength) */
   2475         else{
   2476             *err =U_BUFFER_OVERFLOW_ERROR;
   2477             break;
   2478         }
   2479 
   2480     }/* end while(mySourceIndex<mySourceLength) */
   2481 
   2482     /*
   2483      * the end of the input stream and detection of truncated input
   2484      * are handled by the framework, but for ISO-2022-KR conversion
   2485      * we need to be in ASCII mode at the very end
   2486      *
   2487      * conditions:
   2488      *   successful
   2489      *   not in ASCII mode
   2490      *   end of input and no truncated input
   2491      */
   2492     if( U_SUCCESS(*err) &&
   2493         isTargetByteDBCS &&
   2494         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2495     ) {
   2496         int32_t sourceIndex;
   2497 
   2498         /* we are switching to ASCII */
   2499         isTargetByteDBCS=FALSE;
   2500 
   2501         /* get the source index of the last input character */
   2502         /*
   2503          * TODO this would be simpler and more reliable if we used a pair
   2504          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2505          * so that we could simply use the prevSourceIndex here;
   2506          * this code gives an incorrect result for the rare case of an unmatched
   2507          * trail surrogate that is alone in the last buffer of the text stream
   2508          */
   2509         sourceIndex=(int32_t)(source-args->source);
   2510         if(sourceIndex>0) {
   2511             --sourceIndex;
   2512             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2513                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2514             ) {
   2515                 --sourceIndex;
   2516             }
   2517         } else {
   2518             sourceIndex=-1;
   2519         }
   2520 
   2521         fromUWriteUInt8(
   2522             args->converter,
   2523             SHIFT_IN_STR, 1,
   2524             &target, (const char *)targetLimit,
   2525             &offsets, sourceIndex,
   2526             err);
   2527     }
   2528 
   2529     /*save the state and return */
   2530     args->source = source;
   2531     args->target = (char*)target;
   2532     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2533 }
   2534 
   2535 /************************ To Unicode ***************************************/
   2536 
   2537 static void
   2538 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2539                                                             UErrorCode* err){
   2540     char const* sourceStart;
   2541     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2542 
   2543     UConverterToUnicodeArgs subArgs;
   2544     int32_t minArgsSize;
   2545 
   2546     /* set up the subconverter arguments */
   2547     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2548         minArgsSize = args->size;
   2549     } else {
   2550         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2551     }
   2552 
   2553     uprv_memcpy(&subArgs, args, minArgsSize);
   2554     subArgs.size = (uint16_t)minArgsSize;
   2555     subArgs.converter = myData->currentConverter;
   2556 
   2557     /* remember the original start of the input for offsets */
   2558     sourceStart = args->source;
   2559 
   2560     if(myData->key != 0) {
   2561         /* continue with a partial escape sequence */
   2562         goto escape;
   2563     }
   2564 
   2565     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2566         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2567         subArgs.source = args->source;
   2568         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2569         if(subArgs.source != subArgs.sourceLimit) {
   2570             /*
   2571              * get the current partial byte sequence
   2572              *
   2573              * it needs to be moved between the public and the subconverter
   2574              * so that the conversion framework, which only sees the public
   2575              * converter, can handle truncated and illegal input etc.
   2576              */
   2577             if(args->converter->toULength > 0) {
   2578                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2579             }
   2580             subArgs.converter->toULength = args->converter->toULength;
   2581 
   2582             /*
   2583              * Convert up to the end of the input, or to before the next escape character.
   2584              * Does not handle conversion extensions because the preToU[] state etc.
   2585              * is not copied.
   2586              */
   2587             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2588 
   2589             if(args->offsets != NULL && sourceStart != args->source) {
   2590                 /* update offsets to base them on the actual start of the input */
   2591                 int32_t *offsets = args->offsets;
   2592                 UChar *target = args->target;
   2593                 int32_t delta = (int32_t)(args->source - sourceStart);
   2594                 while(target < subArgs.target) {
   2595                     if(*offsets >= 0) {
   2596                         *offsets += delta;
   2597                     }
   2598                     ++offsets;
   2599                     ++target;
   2600                 }
   2601             }
   2602             args->source = subArgs.source;
   2603             args->target = subArgs.target;
   2604             args->offsets = subArgs.offsets;
   2605 
   2606             /* copy input/error/overflow buffers */
   2607             if(subArgs.converter->toULength > 0) {
   2608                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2609             }
   2610             args->converter->toULength = subArgs.converter->toULength;
   2611 
   2612             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2613                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2614                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2615                                 subArgs.converter->UCharErrorBufferLength);
   2616                 }
   2617                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2618                 subArgs.converter->UCharErrorBufferLength = 0;
   2619             }
   2620         }
   2621 
   2622         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2623             return;
   2624         }
   2625 
   2626 escape:
   2627         changeState_2022(args->converter,
   2628                &(args->source),
   2629                args->sourceLimit,
   2630                ISO_2022_KR,
   2631                err);
   2632     }
   2633 }
   2634 
   2635 static void
   2636 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2637                                                             UErrorCode* err){
   2638     char tempBuf[2];
   2639     const char *mySource = ( char *) args->source;
   2640     UChar *myTarget = args->target;
   2641     const char *mySourceLimit = args->sourceLimit;
   2642     UChar32 targetUniChar = 0x0000;
   2643     UChar mySourceChar = 0x0000;
   2644     UConverterDataISO2022* myData;
   2645     UConverterSharedData* sharedData ;
   2646     UBool useFallback;
   2647 
   2648     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2649     if(myData->version==1){
   2650         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2651         return;
   2652     }
   2653 
   2654     /* initialize state */
   2655     sharedData = myData->currentConverter->sharedData;
   2656     useFallback = args->converter->useFallback;
   2657 
   2658     if(myData->key != 0) {
   2659         /* continue with a partial escape sequence */
   2660         goto escape;
   2661     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2662         /* continue with a partial double-byte character */
   2663         mySourceChar = args->converter->toUBytes[0];
   2664         args->converter->toULength = 0;
   2665         goto getTrailByte;
   2666     }
   2667 
   2668     while(mySource< mySourceLimit){
   2669 
   2670         if(myTarget < args->targetLimit){
   2671 
   2672             mySourceChar= (unsigned char) *mySource++;
   2673 
   2674             if(mySourceChar==UCNV_SI){
   2675                 myData->toU2022State.g = 0;
   2676                 if (myData->isEmptySegment) {
   2677                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2678                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2679                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2680                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2681                     args->converter->toULength = 1;
   2682                     args->target = myTarget;
   2683                     args->source = mySource;
   2684                     return;
   2685                 }
   2686                 /*consume the source */
   2687                 continue;
   2688             }else if(mySourceChar==UCNV_SO){
   2689                 myData->toU2022State.g = 1;
   2690                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2691                 /*consume the source */
   2692                 continue;
   2693             }else if(mySourceChar==ESC_2022){
   2694                 mySource--;
   2695 escape:
   2696                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2697                 changeState_2022(args->converter,&(mySource),
   2698                                 mySourceLimit, ISO_2022_KR, err);
   2699                 if(U_FAILURE(*err)){
   2700                     args->target = myTarget;
   2701                     args->source = mySource;
   2702                     return;
   2703                 }
   2704                 continue;
   2705             }
   2706 
   2707             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2708             if(myData->toU2022State.g == 1) {
   2709                 if(mySource < mySourceLimit) {
   2710                     int leadIsOk, trailIsOk;
   2711                     uint8_t trailByte;
   2712 getTrailByte:
   2713                     targetUniChar = missingCharMarker;
   2714                     trailByte = (uint8_t)*mySource;
   2715                     /*
   2716                      * Ticket 5691: consistent illegal sequences:
   2717                      * - We include at least the first byte in the illegal sequence.
   2718                      * - If any of the non-initial bytes could be the start of a character,
   2719                      *   we stop the illegal sequence before the first one of those.
   2720                      *
   2721                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2722                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2723                      * Otherwise we convert or report the pair of bytes.
   2724                      */
   2725                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2726                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2727                     if (leadIsOk && trailIsOk) {
   2728                         ++mySource;
   2729                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2730                         tempBuf[1] = (char)(trailByte + 0x80);
   2731                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2732                         mySourceChar = (mySourceChar << 8) | trailByte;
   2733                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2734                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2735                         ++mySource;
   2736                         /* add another bit so that the code below writes 2 bytes in case of error */
   2737                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2738                     }
   2739                 } else {
   2740                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2741                     args->converter->toULength = 1;
   2742                     break;
   2743                 }
   2744             }
   2745             else if(mySourceChar <= 0x7f) {
   2746                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2747             } else {
   2748                 targetUniChar = 0xffff;
   2749             }
   2750             if(targetUniChar < 0xfffe){
   2751                 if(args->offsets) {
   2752                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2753                 }
   2754                 *(myTarget++)=(UChar)targetUniChar;
   2755             }
   2756             else {
   2757                 /* Call the callback function*/
   2758                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2759                 break;
   2760             }
   2761         }
   2762         else{
   2763             *err =U_BUFFER_OVERFLOW_ERROR;
   2764             break;
   2765         }
   2766     }
   2767     args->target = myTarget;
   2768     args->source = mySource;
   2769 }
   2770 
   2771 /*************************** END ISO2022-KR *********************************/
   2772 
   2773 /*************************** ISO-2022-CN *********************************
   2774 *
   2775 * Rules for ISO-2022-CN Encoding:
   2776 * i)   The designator sequence must appear once on a line before any instance
   2777 *      of character set it designates.
   2778 * ii)  If two lines contain characters from the same character set, both lines
   2779 *      must include the designator sequence.
   2780 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2781 *      to invoke the  shifting
   2782 * iv)  All lines start in ASCII and end in ASCII.
   2783 * v)   Four shifting sequences are employed for this purpose:
   2784 *
   2785 *      Sequcence   ASCII Eq    Charsets
   2786 *      ----------  -------    ---------
   2787 *      SI           <SI>        US-ASCII
   2788 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2789 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2790 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2791 *
   2792 * vi)
   2793 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2794 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2795 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2796 *
   2797 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2798 *       characters as defined in GB 2312-80, until
   2799 *       another SOdesignation appears
   2800 *
   2801 *
   2802 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2803 *       in ISO-IR-165 (for details, see section 2.1),
   2804 *       until another SOdesignation appears
   2805 *
   2806 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2807 *       in CNS 11643-plane-1, until another
   2808 *       SOdesignation appears
   2809 *
   2810 *      ESC $ * H       Indicates the two bytes immediately following
   2811 *       SS2 is a Chinese character as defined in CNS
   2812 *       11643-plane-2, until another SS2designation
   2813 *       appears
   2814 *       (Meaning <ESC>N must preceed every 2 byte
   2815 *        sequence.)
   2816 *
   2817 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2818 *       is a Chinese character as defined in CNS
   2819 *       11643-plane-3, until another SS3designation
   2820 *       appears
   2821 *       (Meaning <ESC>O must preceed every 2 byte
   2822 *        sequence.)
   2823 *
   2824 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2825 *       is a Chinese character as defined in CNS
   2826 *       11643-plane-4, until another SS3designation
   2827 *       appears
   2828 *       (In English: <ESC>O must preceed every 2 byte
   2829 *        sequence.)
   2830 *
   2831 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2832 *       is a Chinese character as defined in CNS
   2833 *       11643-plane-5, until another SS3designation
   2834 *       appears
   2835 *
   2836 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2837 *       is a Chinese character as defined in CNS
   2838 *       11643-plane-6, until another SS3designation
   2839 *       appears
   2840 *
   2841 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2842 *       is a Chinese character as defined in CNS
   2843 *       11643-plane-7, until another SS3designation
   2844 *       appears
   2845 *
   2846 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2847 *       has its own designation information before any Chinese characters
   2848 *       appear
   2849 *
   2850 */
   2851 
   2852 /* The following are defined this way to make the strings truely readonly */
   2853 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2854 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2855 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2856 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2857 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2858 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2859 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2860 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2861 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2862 
   2863 /********************** ISO2022-CN Data **************************/
   2864 static const char* const escSeqCharsCN[10] ={
   2865         SHIFT_IN_STR,           /* ASCII */
   2866         GB_2312_80_STR,
   2867         ISO_IR_165_STR,
   2868         CNS_11643_1992_Plane_1_STR,
   2869         CNS_11643_1992_Plane_2_STR,
   2870         CNS_11643_1992_Plane_3_STR,
   2871         CNS_11643_1992_Plane_4_STR,
   2872         CNS_11643_1992_Plane_5_STR,
   2873         CNS_11643_1992_Plane_6_STR,
   2874         CNS_11643_1992_Plane_7_STR
   2875 };
   2876 
   2877 static void
   2878 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2879     UConverter *cnv = args->converter;
   2880     UConverterDataISO2022 *converterData;
   2881     ISO2022State *pFromU2022State;
   2882     uint8_t *target = (uint8_t *) args->target;
   2883     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2884     const UChar* source = args->source;
   2885     const UChar* sourceLimit = args->sourceLimit;
   2886     int32_t* offsets = args->offsets;
   2887     UChar32 sourceChar;
   2888     char buffer[8];
   2889     int32_t len;
   2890     int8_t choices[3];
   2891     int32_t choiceCount;
   2892     uint32_t targetValue = 0;
   2893     UBool useFallback;
   2894 
   2895     /* set up the state */
   2896     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2897     pFromU2022State   = &converterData->fromU2022State;
   2898 
   2899     choiceCount = 0;
   2900 
   2901     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2902     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2903         goto getTrail;
   2904     }
   2905 
   2906     while( source < sourceLimit){
   2907         if(target < targetLimit){
   2908 
   2909             sourceChar  = *(source++);
   2910             /*check if the char is a First surrogate*/
   2911              if(UTF_IS_SURROGATE(sourceChar)) {
   2912                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2913 getTrail:
   2914                     /*look ahead to find the trail surrogate*/
   2915                     if(source < sourceLimit) {
   2916                         /* test the following code unit */
   2917                         UChar trail=(UChar) *source;
   2918                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   2919                             source++;
   2920                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2921                             cnv->fromUChar32=0x00;
   2922                             /* convert this supplementary code point */
   2923                             /* exit this condition tree */
   2924                         } else {
   2925                             /* this is an unmatched lead code unit (1st surrogate) */
   2926                             /* callback(illegal) */
   2927                             *err=U_ILLEGAL_CHAR_FOUND;
   2928                             cnv->fromUChar32=sourceChar;
   2929                             break;
   2930                         }
   2931                     } else {
   2932                         /* no more input */
   2933                         cnv->fromUChar32=sourceChar;
   2934                         break;
   2935                     }
   2936                 } else {
   2937                     /* this is an unmatched trail code unit (2nd surrogate) */
   2938                     /* callback(illegal) */
   2939                     *err=U_ILLEGAL_CHAR_FOUND;
   2940                     cnv->fromUChar32=sourceChar;
   2941                     break;
   2942                 }
   2943             }
   2944 
   2945             /* do the conversion */
   2946             if(sourceChar <= 0x007f ){
   2947                 /* do not convert SO/SI/ESC */
   2948                 if(IS_2022_CONTROL(sourceChar)) {
   2949                     /* callback(illegal) */
   2950                     *err=U_ILLEGAL_CHAR_FOUND;
   2951                     cnv->fromUChar32=sourceChar;
   2952                     break;
   2953                 }
   2954 
   2955                 /* US-ASCII */
   2956                 if(pFromU2022State->g == 0) {
   2957                     buffer[0] = (char)sourceChar;
   2958                     len = 1;
   2959                 } else {
   2960                     buffer[0] = UCNV_SI;
   2961                     buffer[1] = (char)sourceChar;
   2962                     len = 2;
   2963                     pFromU2022State->g = 0;
   2964                     choiceCount = 0;
   2965                 }
   2966                 if(sourceChar == CR || sourceChar == LF) {
   2967                     /* reset the state at the end of a line */
   2968                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2969                     choiceCount = 0;
   2970                 }
   2971             }
   2972             else{
   2973                 /* convert U+0080..U+10ffff */
   2974                 int32_t i;
   2975                 int8_t cs, g;
   2976 
   2977                 if(choiceCount == 0) {
   2978                     /* try the current SO/G1 converter first */
   2979                     choices[0] = pFromU2022State->cs[1];
   2980 
   2981                     /* default to GB2312_1 if none is designated yet */
   2982                     if(choices[0] == 0) {
   2983                         choices[0] = GB2312_1;
   2984                     }
   2985 
   2986                     if(converterData->version == 0) {
   2987                         /* ISO-2022-CN */
   2988 
   2989                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   2990                         if(choices[0] == GB2312_1) {
   2991                             choices[1] = (int8_t)CNS_11643_1;
   2992                         } else {
   2993                             choices[1] = (int8_t)GB2312_1;
   2994                         }
   2995 
   2996                         choiceCount = 2;
   2997                     } else {
   2998                         /* ISO-2022-CN-EXT */
   2999 
   3000                         /* try one of the other converters */
   3001                         switch(choices[0]) {
   3002                         case GB2312_1:
   3003                             choices[1] = (int8_t)CNS_11643_1;
   3004                             choices[2] = (int8_t)ISO_IR_165;
   3005                             break;
   3006                         case ISO_IR_165:
   3007                             choices[1] = (int8_t)GB2312_1;
   3008                             choices[2] = (int8_t)CNS_11643_1;
   3009                             break;
   3010                         default: /* CNS_11643_x */
   3011                             choices[1] = (int8_t)GB2312_1;
   3012                             choices[2] = (int8_t)ISO_IR_165;
   3013                             break;
   3014                         }
   3015 
   3016                         choiceCount = 3;
   3017                     }
   3018                 }
   3019 
   3020                 cs = g = 0;
   3021                 /*
   3022                  * len==0: no mapping found yet
   3023                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3024                  * len>0: found a roundtrip result, done
   3025                  */
   3026                 len = 0;
   3027                 /*
   3028                  * We will turn off useFallback after finding a fallback,
   3029                  * but we still get fallbacks from PUA code points as usual.
   3030                  * Therefore, we will also need to check that we don't overwrite
   3031                  * an early fallback with a later one.
   3032                  */
   3033                 useFallback = cnv->useFallback;
   3034 
   3035                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3036                     int8_t cs0 = choices[i];
   3037                     if(cs0 > 0) {
   3038                         uint32_t value;
   3039                         int32_t len2;
   3040                         if(cs0 >= CNS_11643_0) {
   3041                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3042                                         converterData->myConverterArray[CNS_11643],
   3043                                         sourceChar,
   3044                                         &value,
   3045                                         useFallback,
   3046                                         MBCS_OUTPUT_3);
   3047                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3048                                 targetValue = value;
   3049                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3050                                 if(len2 >= 0) {
   3051                                     len = 2;
   3052                                 } else {
   3053                                     len = -2;
   3054                                     useFallback = FALSE;
   3055                                 }
   3056                                 if(cs == CNS_11643_1) {
   3057                                     g = 1;
   3058                                 } else if(cs == CNS_11643_2) {
   3059                                     g = 2;
   3060                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3061                                     g = 3;
   3062                                 } else {
   3063                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3064                                     len = 0;
   3065                                 }
   3066                             }
   3067                         } else {
   3068                             /* GB2312_1 or ISO-IR-165 */
   3069                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3070                                         converterData->myConverterArray[cs0],
   3071                                         sourceChar,
   3072                                         &value,
   3073                                         useFallback,
   3074                                         MBCS_OUTPUT_2);
   3075                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3076                                 targetValue = value;
   3077                                 len = len2;
   3078                                 cs = cs0;
   3079                                 g = 1;
   3080                                 useFallback = FALSE;
   3081                             }
   3082                         }
   3083                     }
   3084                 }
   3085 
   3086                 if(len != 0) {
   3087                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3088 
   3089                     /* write the designation sequence if necessary */
   3090                     if(cs != pFromU2022State->cs[g]) {
   3091                         if(cs < CNS_11643) {
   3092                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3093                         } else {
   3094                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3095                         }
   3096                         len = 4;
   3097                         pFromU2022State->cs[g] = cs;
   3098                         if(g == 1) {
   3099                             /* changing the SO/G1 charset invalidates the choices[] */
   3100                             choiceCount = 0;
   3101                         }
   3102                     }
   3103 
   3104                     /* write the shift sequence if necessary */
   3105                     if(g != pFromU2022State->g) {
   3106                         switch(g) {
   3107                         case 1:
   3108                             buffer[len++] = UCNV_SO;
   3109 
   3110                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3111                             pFromU2022State->g = 1;
   3112                             break;
   3113                         case 2:
   3114                             buffer[len++] = 0x1b;
   3115                             buffer[len++] = 0x4e;
   3116                             break;
   3117                         default: /* case 3 */
   3118                             buffer[len++] = 0x1b;
   3119                             buffer[len++] = 0x4f;
   3120                             break;
   3121                         }
   3122                     }
   3123 
   3124                     /* write the two output bytes */
   3125                     buffer[len++] = (char)(targetValue >> 8);
   3126                     buffer[len++] = (char)targetValue;
   3127                 } else {
   3128                     /* if we cannot find the character after checking all codepages
   3129                      * then this is an error
   3130                      */
   3131                     *err = U_INVALID_CHAR_FOUND;
   3132                     cnv->fromUChar32=sourceChar;
   3133                     break;
   3134                 }
   3135             }
   3136 
   3137             /* output len>0 bytes in buffer[] */
   3138             if(len == 1) {
   3139                 *target++ = buffer[0];
   3140                 if(offsets) {
   3141                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3142                 }
   3143             } else if(len == 2 && (target + 2) <= targetLimit) {
   3144                 *target++ = buffer[0];
   3145                 *target++ = buffer[1];
   3146                 if(offsets) {
   3147                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3148                     *offsets++ = sourceIndex;
   3149                     *offsets++ = sourceIndex;
   3150                 }
   3151             } else {
   3152                 fromUWriteUInt8(
   3153                     cnv,
   3154                     buffer, len,
   3155                     &target, (const char *)targetLimit,
   3156                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3157                     err);
   3158                 if(U_FAILURE(*err)) {
   3159                     break;
   3160                 }
   3161             }
   3162         } /* end if(myTargetIndex<myTargetLength) */
   3163         else{
   3164             *err =U_BUFFER_OVERFLOW_ERROR;
   3165             break;
   3166         }
   3167 
   3168     }/* end while(mySourceIndex<mySourceLength) */
   3169 
   3170     /*
   3171      * the end of the input stream and detection of truncated input
   3172      * are handled by the framework, but for ISO-2022-CN conversion
   3173      * we need to be in ASCII mode at the very end
   3174      *
   3175      * conditions:
   3176      *   successful
   3177      *   not in ASCII mode
   3178      *   end of input and no truncated input
   3179      */
   3180     if( U_SUCCESS(*err) &&
   3181         pFromU2022State->g!=0 &&
   3182         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3183     ) {
   3184         int32_t sourceIndex;
   3185 
   3186         /* we are switching to ASCII */
   3187         pFromU2022State->g=0;
   3188 
   3189         /* get the source index of the last input character */
   3190         /*
   3191          * TODO this would be simpler and more reliable if we used a pair
   3192          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3193          * so that we could simply use the prevSourceIndex here;
   3194          * this code gives an incorrect result for the rare case of an unmatched
   3195          * trail surrogate that is alone in the last buffer of the text stream
   3196          */
   3197         sourceIndex=(int32_t)(source-args->source);
   3198         if(sourceIndex>0) {
   3199             --sourceIndex;
   3200             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3201                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3202             ) {
   3203                 --sourceIndex;
   3204             }
   3205         } else {
   3206             sourceIndex=-1;
   3207         }
   3208 
   3209         fromUWriteUInt8(
   3210             cnv,
   3211             SHIFT_IN_STR, 1,
   3212             &target, (const char *)targetLimit,
   3213             &offsets, sourceIndex,
   3214             err);
   3215     }
   3216 
   3217     /*save the state and return */
   3218     args->source = source;
   3219     args->target = (char*)target;
   3220 }
   3221 
   3222 
   3223 static void
   3224 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3225                                                UErrorCode* err){
   3226     char tempBuf[3];
   3227     const char *mySource = (char *) args->source;
   3228     UChar *myTarget = args->target;
   3229     const char *mySourceLimit = args->sourceLimit;
   3230     uint32_t targetUniChar = 0x0000;
   3231     uint32_t mySourceChar = 0x0000;
   3232     UConverterDataISO2022* myData;
   3233     ISO2022State *pToU2022State;
   3234 
   3235     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3236     pToU2022State = &myData->toU2022State;
   3237 
   3238     if(myData->key != 0) {
   3239         /* continue with a partial escape sequence */
   3240         goto escape;
   3241     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3242         /* continue with a partial double-byte character */
   3243         mySourceChar = args->converter->toUBytes[0];
   3244         args->converter->toULength = 0;
   3245         targetUniChar = missingCharMarker;
   3246         goto getTrailByte;
   3247     }
   3248 
   3249     while(mySource < mySourceLimit){
   3250 
   3251         targetUniChar =missingCharMarker;
   3252 
   3253         if(myTarget < args->targetLimit){
   3254 
   3255             mySourceChar= (unsigned char) *mySource++;
   3256 
   3257             switch(mySourceChar){
   3258             case UCNV_SI:
   3259                 pToU2022State->g=0;
   3260                 if (myData->isEmptySegment) {
   3261                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3262                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3263                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3264                     args->converter->toUBytes[0] = mySourceChar;
   3265                     args->converter->toULength = 1;
   3266                     args->target = myTarget;
   3267                     args->source = mySource;
   3268                     return;
   3269                 }
   3270                 continue;
   3271 
   3272             case UCNV_SO:
   3273                 if(pToU2022State->cs[1] != 0) {
   3274                     pToU2022State->g=1;
   3275                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3276                     continue;
   3277                 } else {
   3278                     /* illegal to have SO before a matching designator */
   3279                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3280                     break;
   3281                 }
   3282 
   3283             case ESC_2022:
   3284                 mySource--;
   3285 escape:
   3286                 {
   3287                     const char * mySourceBefore = mySource;
   3288                     int8_t toULengthBefore = args->converter->toULength;
   3289 
   3290                     changeState_2022(args->converter,&(mySource),
   3291                         mySourceLimit, ISO_2022_CN,err);
   3292 
   3293                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3294                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3295                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3296                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3297                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   3298                     }
   3299                 }
   3300 
   3301                 /* invalid or illegal escape sequence */
   3302                 if(U_FAILURE(*err)){
   3303                     args->target = myTarget;
   3304                     args->source = mySource;
   3305                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3306                     return;
   3307                 }
   3308                 continue;
   3309 
   3310             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3311 
   3312             case CR:
   3313                 /*falls through*/
   3314             case LF:
   3315                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3316                 /* falls through */
   3317             default:
   3318                 /* convert one or two bytes */
   3319                 myData->isEmptySegment = FALSE;
   3320                 if(pToU2022State->g != 0) {
   3321                     if(mySource < mySourceLimit) {
   3322                         UConverterSharedData *cnv;
   3323                         StateEnum tempState;
   3324                         int32_t tempBufLen;
   3325                         int leadIsOk, trailIsOk;
   3326                         uint8_t trailByte;
   3327 getTrailByte:
   3328                         trailByte = (uint8_t)*mySource;
   3329                         /*
   3330                          * Ticket 5691: consistent illegal sequences:
   3331                          * - We include at least the first byte in the illegal sequence.
   3332                          * - If any of the non-initial bytes could be the start of a character,
   3333                          *   we stop the illegal sequence before the first one of those.
   3334                          *
   3335                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3336                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3337                          * Otherwise we convert or report the pair of bytes.
   3338                          */
   3339                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3340                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3341                         if (leadIsOk && trailIsOk) {
   3342                             ++mySource;
   3343                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3344                             if(tempState >= CNS_11643_0) {
   3345                                 cnv = myData->myConverterArray[CNS_11643];
   3346                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3347                                 tempBuf[1] = (char) (mySourceChar);
   3348                                 tempBuf[2] = (char) trailByte;
   3349                                 tempBufLen = 3;
   3350 
   3351                             }else{
   3352                                 cnv = myData->myConverterArray[tempState];
   3353                                 tempBuf[0] = (char) (mySourceChar);
   3354                                 tempBuf[1] = (char) trailByte;
   3355                                 tempBufLen = 2;
   3356                             }
   3357                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3358                             mySourceChar = (mySourceChar << 8) | trailByte;
   3359                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3360                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3361                             ++mySource;
   3362                             /* add another bit so that the code below writes 2 bytes in case of error */
   3363                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3364                         }
   3365                         if(pToU2022State->g>=2) {
   3366                             /* return from a single-shift state to the previous one */
   3367                             pToU2022State->g=pToU2022State->prevG;
   3368                         }
   3369                     } else {
   3370                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3371                         args->converter->toULength = 1;
   3372                         goto endloop;
   3373                     }
   3374                 }
   3375                 else{
   3376                     if(mySourceChar <= 0x7f) {
   3377                         targetUniChar = (UChar) mySourceChar;
   3378                     }
   3379                 }
   3380                 break;
   3381             }
   3382             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3383                 if(args->offsets){
   3384                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3385                 }
   3386                 *(myTarget++)=(UChar)targetUniChar;
   3387             }
   3388             else if(targetUniChar > missingCharMarker){
   3389                 /* disassemble the surrogate pair and write to output*/
   3390                 targetUniChar-=0x0010000;
   3391                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3392                 if(args->offsets){
   3393                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3394                 }
   3395                 ++myTarget;
   3396                 if(myTarget< args->targetLimit){
   3397                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3398                     if(args->offsets){
   3399                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3400                     }
   3401                     ++myTarget;
   3402                 }else{
   3403                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3404                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3405                 }
   3406 
   3407             }
   3408             else{
   3409                 /* Call the callback function*/
   3410                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3411                 break;
   3412             }
   3413         }
   3414         else{
   3415             *err =U_BUFFER_OVERFLOW_ERROR;
   3416             break;
   3417         }
   3418     }
   3419 endloop:
   3420     args->target = myTarget;
   3421     args->source = mySource;
   3422 }
   3423 
   3424 static void
   3425 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3426     UConverter *cnv = args->converter;
   3427     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3428     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3429     char *p, *subchar;
   3430     char buffer[8];
   3431     int32_t length;
   3432 
   3433     subchar=(char *)cnv->subChars;
   3434     length=cnv->subCharLen; /* assume length==1 for most variants */
   3435 
   3436     p = buffer;
   3437     switch(myConverterData->locale[0]){
   3438     case 'j':
   3439         {
   3440             int8_t cs;
   3441 
   3442             if(pFromU2022State->g == 1) {
   3443                 /* JIS7: switch from G1 to G0 */
   3444                 pFromU2022State->g = 0;
   3445                 *p++ = UCNV_SI;
   3446             }
   3447 
   3448             cs = pFromU2022State->cs[0];
   3449             if(cs != ASCII && cs != JISX201) {
   3450                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3451                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3452                 *p++ = '\x1b';
   3453                 *p++ = '\x28';
   3454                 *p++ = '\x42';
   3455             }
   3456 
   3457             *p++ = subchar[0];
   3458             break;
   3459         }
   3460     case 'c':
   3461         if(pFromU2022State->g != 0) {
   3462             /* not in ASCII mode: switch to ASCII */
   3463             pFromU2022State->g = 0;
   3464             *p++ = UCNV_SI;
   3465         }
   3466         *p++ = subchar[0];
   3467         break;
   3468     case 'k':
   3469         if(myConverterData->version == 0) {
   3470             if(length == 1) {
   3471                 if((UBool)args->converter->fromUnicodeStatus) {
   3472                     /* in DBCS mode: switch to SBCS */
   3473                     args->converter->fromUnicodeStatus = 0;
   3474                     *p++ = UCNV_SI;
   3475                 }
   3476                 *p++ = subchar[0];
   3477             } else /* length == 2*/ {
   3478                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3479                     /* in SBCS mode: switch to DBCS */
   3480                     args->converter->fromUnicodeStatus = 1;
   3481                     *p++ = UCNV_SO;
   3482                 }
   3483                 *p++ = subchar[0];
   3484                 *p++ = subchar[1];
   3485             }
   3486             break;
   3487         } else {
   3488             /* save the subconverter's substitution string */
   3489             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3490             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3491 
   3492             /* set our substitution string into the subconverter */
   3493             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3494             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3495 
   3496             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3497             args->converter = myConverterData->currentConverter;
   3498             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3499             ucnv_cbFromUWriteSub(args, 0, err);
   3500             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3501             args->converter = cnv;
   3502 
   3503             /* restore the subconverter's substitution string */
   3504             myConverterData->currentConverter->subChars = currentSubChars;
   3505             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3506 
   3507             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3508                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3509                     uprv_memcpy(
   3510                         cnv->charErrorBuffer,
   3511                         myConverterData->currentConverter->charErrorBuffer,
   3512                         myConverterData->currentConverter->charErrorBufferLength);
   3513                 }
   3514                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3515                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3516             }
   3517             return;
   3518         }
   3519     default:
   3520         /* not expected */
   3521         break;
   3522     }
   3523     ucnv_cbFromUWriteBytes(args,
   3524                            buffer, (int32_t)(p - buffer),
   3525                            offsetIndex, err);
   3526 }
   3527 
   3528 /*
   3529  * Structure for cloning an ISO 2022 converter into a single memory block.
   3530  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3531  * and then ucnv_safeClone() of the sub-converter may additionally align
   3532  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3533  * after currentConverter.
   3534  * This is because UAlignedMemory may be larger than the actually
   3535  * necessary alignment size for the platform.
   3536  * The other cloneStruct fields will not be moved around,
   3537  * and are aligned properly with cloneStruct's alignment.
   3538  */
   3539 struct cloneStruct
   3540 {
   3541     UConverter cnv;
   3542     UConverter currentConverter;
   3543     UAlignedMemory deadSpace;
   3544     UConverterDataISO2022 mydata;
   3545 };
   3546 
   3547 
   3548 static UConverter *
   3549 _ISO_2022_SafeClone(
   3550             const UConverter *cnv,
   3551             void *stackBuffer,
   3552             int32_t *pBufferSize,
   3553             UErrorCode *status)
   3554 {
   3555     struct cloneStruct * localClone;
   3556     UConverterDataISO2022 *cnvData;
   3557     int32_t i, size;
   3558 
   3559     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3560         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3561         return NULL;
   3562     }
   3563 
   3564     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3565     localClone = (struct cloneStruct *)stackBuffer;
   3566 
   3567     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3568 
   3569     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3570     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3571     localClone->cnv.isExtraLocal = TRUE;
   3572 
   3573     /* share the subconverters */
   3574 
   3575     if(cnvData->currentConverter != NULL) {
   3576         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3577         localClone->mydata.currentConverter =
   3578             ucnv_safeClone(cnvData->currentConverter,
   3579                             &localClone->currentConverter,
   3580                             &size, status);
   3581         if(U_FAILURE(*status)) {
   3582             return NULL;
   3583         }
   3584     }
   3585 
   3586     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3587         if(cnvData->myConverterArray[i] != NULL) {
   3588             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3589         }
   3590     }
   3591 
   3592     return &localClone->cnv;
   3593 }
   3594 
   3595 static void
   3596 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3597                     const USetAdder *sa,
   3598                     UConverterUnicodeSet which,
   3599                     UErrorCode *pErrorCode)
   3600 {
   3601     int32_t i;
   3602     UConverterDataISO2022* cnvData;
   3603 
   3604     if (U_FAILURE(*pErrorCode)) {
   3605         return;
   3606     }
   3607 #ifdef U_ENABLE_GENERIC_ISO_2022
   3608     if (cnv->sharedData == &_ISO2022Data) {
   3609         /* We use UTF-8 in this case */
   3610         sa->addRange(sa->set, 0, 0xd7FF);
   3611         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3612         return;
   3613     }
   3614 #endif
   3615 
   3616     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3617 
   3618     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3619     switch(cnvData->locale[0]){
   3620     case 'j':
   3621         /* include JIS X 0201 which is hardcoded */
   3622         sa->add(sa->set, 0xa5);
   3623         sa->add(sa->set, 0x203e);
   3624         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3625             /* include Latin-1 for some variants of JP */
   3626             sa->addRange(sa->set, 0, 0xff);
   3627         } else {
   3628             /* include ASCII for JP */
   3629             sa->addRange(sa->set, 0, 0x7f);
   3630         }
   3631         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3632             /*
   3633              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3634              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3635              * use half-width Katakana.
   3636              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3637              * half-width Katakana via the ESC ( I sequence.
   3638              * However, we only emit (fromUnicode) half-width Katakana according to the
   3639              * definition of each variant.
   3640              *
   3641              * When including fallbacks,
   3642              * we need to include half-width Katakana Unicode code points for all JP variants because
   3643              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3644              */
   3645             /* include half-width Katakana for JP */
   3646             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3647         }
   3648         break;
   3649     case 'c':
   3650     case 'z':
   3651         /* include ASCII for CN */
   3652         sa->addRange(sa->set, 0, 0x7f);
   3653         break;
   3654     case 'k':
   3655         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3656         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3657                 cnvData->currentConverter, sa, which, pErrorCode);
   3658         /* the loop over myConverterArray[] will simply not find another converter */
   3659         break;
   3660     default:
   3661         break;
   3662     }
   3663 
   3664 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3665             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3666                 cnvData->version==0 && i==CNS_11643
   3667             ) {
   3668                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3669                 ucnv_MBCSGetUnicodeSetForBytes(
   3670                         cnvData->myConverterArray[i],
   3671                         sa, UCNV_ROUNDTRIP_SET,
   3672                         0, 0x81, 0x82,
   3673                         pErrorCode);
   3674             }
   3675 #endif
   3676 
   3677     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3678         UConverterSetFilter filter;
   3679         if(cnvData->myConverterArray[i]!=NULL) {
   3680             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3681                 cnvData->version==0 && i==CNS_11643
   3682             ) {
   3683                 /*
   3684                  * Version-specific for CN:
   3685                  * CN version 0 does not map CNS planes 3..7 although
   3686                  * they are all available in the CNS conversion table;
   3687                  * CN version 1 (-EXT) does map them all.
   3688                  * The two versions create different Unicode sets.
   3689                  */
   3690                 filter=UCNV_SET_FILTER_2022_CN;
   3691             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3692                 /*
   3693                  * Only add code points that map to Shift-JIS codes
   3694                  * corresponding to JIS X 0208.
   3695                  */
   3696                 filter=UCNV_SET_FILTER_SJIS;
   3697             } else if(i==KSC5601) {
   3698                 /*
   3699                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3700                  * are broader than GR94.
   3701                  */
   3702                 filter=UCNV_SET_FILTER_GR94DBCS;
   3703             } else {
   3704                 filter=UCNV_SET_FILTER_NONE;
   3705             }
   3706             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3707         }
   3708     }
   3709 
   3710     /*
   3711      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3712      * sub-converters do by themselves.
   3713      * Remove these characters from the set.
   3714      */
   3715     sa->remove(sa->set, 0x0e);
   3716     sa->remove(sa->set, 0x0f);
   3717     sa->remove(sa->set, 0x1b);
   3718 
   3719     /* ISO 2022 converters do not convert C1 controls either */
   3720     sa->removeRange(sa->set, 0x80, 0x9f);
   3721 }
   3722 
   3723 static const UConverterImpl _ISO2022Impl={
   3724     UCNV_ISO_2022,
   3725 
   3726     NULL,
   3727     NULL,
   3728 
   3729     _ISO2022Open,
   3730     _ISO2022Close,
   3731     _ISO2022Reset,
   3732 
   3733 #ifdef U_ENABLE_GENERIC_ISO_2022
   3734     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3735     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3736     ucnv_fromUnicode_UTF8,
   3737     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3738 #else
   3739     NULL,
   3740     NULL,
   3741     NULL,
   3742     NULL,
   3743 #endif
   3744     NULL,
   3745 
   3746     NULL,
   3747     _ISO2022getName,
   3748     _ISO_2022_WriteSub,
   3749     _ISO_2022_SafeClone,
   3750     _ISO_2022_GetUnicodeSet
   3751 };
   3752 static const UConverterStaticData _ISO2022StaticData={
   3753     sizeof(UConverterStaticData),
   3754     "ISO_2022",
   3755     2022,
   3756     UCNV_IBM,
   3757     UCNV_ISO_2022,
   3758     1,
   3759     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3760     { 0x1a, 0, 0, 0 },
   3761     1,
   3762     FALSE,
   3763     FALSE,
   3764     0,
   3765     0,
   3766     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3767 };
   3768 const UConverterSharedData _ISO2022Data={
   3769     sizeof(UConverterSharedData),
   3770     ~((uint32_t) 0),
   3771     NULL,
   3772     NULL,
   3773     &_ISO2022StaticData,
   3774     FALSE,
   3775     &_ISO2022Impl,
   3776     0
   3777 };
   3778 
   3779 /*************JP****************/
   3780 static const UConverterImpl _ISO2022JPImpl={
   3781     UCNV_ISO_2022,
   3782 
   3783     NULL,
   3784     NULL,
   3785 
   3786     _ISO2022Open,
   3787     _ISO2022Close,
   3788     _ISO2022Reset,
   3789 
   3790     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3791     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3792     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3793     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3794     NULL,
   3795 
   3796     NULL,
   3797     _ISO2022getName,
   3798     _ISO_2022_WriteSub,
   3799     _ISO_2022_SafeClone,
   3800     _ISO_2022_GetUnicodeSet
   3801 };
   3802 static const UConverterStaticData _ISO2022JPStaticData={
   3803     sizeof(UConverterStaticData),
   3804     "ISO_2022_JP",
   3805     0,
   3806     UCNV_IBM,
   3807     UCNV_ISO_2022,
   3808     1,
   3809     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3810     { 0x1a, 0, 0, 0 },
   3811     1,
   3812     FALSE,
   3813     FALSE,
   3814     0,
   3815     0,
   3816     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3817 };
   3818 static const UConverterSharedData _ISO2022JPData={
   3819     sizeof(UConverterSharedData),
   3820     ~((uint32_t) 0),
   3821     NULL,
   3822     NULL,
   3823     &_ISO2022JPStaticData,
   3824     FALSE,
   3825     &_ISO2022JPImpl,
   3826     0
   3827 };
   3828 
   3829 /************* KR ***************/
   3830 static const UConverterImpl _ISO2022KRImpl={
   3831     UCNV_ISO_2022,
   3832 
   3833     NULL,
   3834     NULL,
   3835 
   3836     _ISO2022Open,
   3837     _ISO2022Close,
   3838     _ISO2022Reset,
   3839 
   3840     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3841     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3842     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3843     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3844     NULL,
   3845 
   3846     NULL,
   3847     _ISO2022getName,
   3848     _ISO_2022_WriteSub,
   3849     _ISO_2022_SafeClone,
   3850     _ISO_2022_GetUnicodeSet
   3851 };
   3852 static const UConverterStaticData _ISO2022KRStaticData={
   3853     sizeof(UConverterStaticData),
   3854     "ISO_2022_KR",
   3855     0,
   3856     UCNV_IBM,
   3857     UCNV_ISO_2022,
   3858     1,
   3859     3, /* max 3 bytes per UChar: SO+DBCS */
   3860     { 0x1a, 0, 0, 0 },
   3861     1,
   3862     FALSE,
   3863     FALSE,
   3864     0,
   3865     0,
   3866     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3867 };
   3868 static const UConverterSharedData _ISO2022KRData={
   3869     sizeof(UConverterSharedData),
   3870     ~((uint32_t) 0),
   3871     NULL,
   3872     NULL,
   3873     &_ISO2022KRStaticData,
   3874     FALSE,
   3875     &_ISO2022KRImpl,
   3876     0
   3877 };
   3878 
   3879 /*************** CN ***************/
   3880 static const UConverterImpl _ISO2022CNImpl={
   3881 
   3882     UCNV_ISO_2022,
   3883 
   3884     NULL,
   3885     NULL,
   3886 
   3887     _ISO2022Open,
   3888     _ISO2022Close,
   3889     _ISO2022Reset,
   3890 
   3891     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3892     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3893     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3894     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3895     NULL,
   3896 
   3897     NULL,
   3898     _ISO2022getName,
   3899     _ISO_2022_WriteSub,
   3900     _ISO_2022_SafeClone,
   3901     _ISO_2022_GetUnicodeSet
   3902 };
   3903 static const UConverterStaticData _ISO2022CNStaticData={
   3904     sizeof(UConverterStaticData),
   3905     "ISO_2022_CN",
   3906     0,
   3907     UCNV_IBM,
   3908     UCNV_ISO_2022,
   3909     1,
   3910     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3911     { 0x1a, 0, 0, 0 },
   3912     1,
   3913     FALSE,
   3914     FALSE,
   3915     0,
   3916     0,
   3917     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3918 };
   3919 static const UConverterSharedData _ISO2022CNData={
   3920     sizeof(UConverterSharedData),
   3921     ~((uint32_t) 0),
   3922     NULL,
   3923     NULL,
   3924     &_ISO2022CNStaticData,
   3925     FALSE,
   3926     &_ISO2022CNImpl,
   3927     0
   3928 };
   3929 
   3930 
   3931 
   3932 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3933