Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.c
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "ucnv_imp.h"
     38 #include "ucnv_bld.h"
     39 #include "ucnv_cnv.h"
     40 #include "ucnvmbcs.h"
     41 #include "cstring.h"
     42 #include "cmemory.h"
     43 
     44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     45 
     46 #ifdef U_ENABLE_GENERIC_ISO_2022
     47 /*
     48  * I am disabling the generic ISO-2022 converter after proposing to do so on
     49  * the icu mailing list two days ago.
     50  *
     51  * Reasons:
     52  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     53  *    its designation sequences, single shifts with return to the previous state,
     54  *    switch-with-no-return to UTF-16BE or similar, etc.
     55  *    This is unlike the language-specific variants like ISO-2022-JP which
     56  *    require a much smaller repertoire of ISO-2022 features.
     57  *    These variants continue to be supported.
     58  * 2. I believe that no one is really using the generic ISO-2022 converter
     59  *    but rather always one of the language-specific variants.
     60  *    Note that ICU's generic ISO-2022 converter has always output one escape
     61  *    sequence followed by UTF-8 for the whole stream.
     62  * 3. Switching between subcharsets is extremely slow, because each time
     63  *    the previous converter is closed and a new one opened,
     64  *    without any kind of caching, least-recently-used list, etc.
     65  * 4. The code is currently buggy, and given the above it does not seem
     66  *    reasonable to spend the time on maintenance.
     67  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     68  *    This means, for example, that when ISO-8859-7 is designated, the following
     69  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     70  *    The ICU ISO-2022 converter does not handle this - and has no information
     71  *    about which subconverter would have to be shifted vs. which is designed
     72  *    for 7-bit ISO-2022.
     73  *
     74  * Markus Scherer 2003-dec-03
     75  */
     76 #endif
     77 
     78 static const char SHIFT_IN_STR[]  = "\x0F";
     79 static const char SHIFT_OUT_STR[] = "\x0E";
     80 
     81 #define CR      0x0D
     82 #define LF      0x0A
     83 #define H_TAB   0x09
     84 #define V_TAB   0x0B
     85 #define SPACE   0x20
     86 
     87 enum {
     88     HWKANA_START=0xff61,
     89     HWKANA_END=0xff9f
     90 };
     91 
     92 /*
     93  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     94  * as bytes 21..7E. (Subtract 0x80.)
     95  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     96  * as bytes 20..7F. (Subtract 0x80.)
     97  * Do not encode C1 control codes with native bytes 80..9F
     98  * as bytes 00..1F (C0 control codes).
     99  */
    100 enum {
    101     GR94_START=0xa1,
    102     GR94_END=0xfe,
    103     GR96_START=0xa0,
    104     GR96_END=0xff
    105 };
    106 
    107 /*
    108  * ISO 2022 control codes must not be converted from Unicode
    109  * because they would mess up the byte stream.
    110  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    111  * corresponding to SO, SI, and ESC.
    112  */
    113 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    114 
    115 /* for ISO-2022-JP and -CN implementations */
    116 typedef enum  {
    117         /* shared values */
    118         INVALID_STATE=-1,
    119         ASCII = 0,
    120 
    121         SS2_STATE=0x10,
    122         SS3_STATE,
    123 
    124         /* JP */
    125         ISO8859_1 = 1 ,
    126         ISO8859_7 = 2 ,
    127         JISX201  = 3,
    128         JISX208 = 4,
    129         JISX212 = 5,
    130         GB2312  =6,
    131         KSC5601 =7,
    132         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    133 
    134         /* CN */
    135         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    136         GB2312_1=1,
    137         ISO_IR_165=2,
    138         CNS_11643=3,
    139 
    140         /*
    141          * these are used in StateEnum and ISO2022State variables,
    142          * but CNS_11643 must be used to index into myConverterArray[]
    143          */
    144         CNS_11643_0=0x20,
    145         CNS_11643_1,
    146         CNS_11643_2,
    147         CNS_11643_3,
    148         CNS_11643_4,
    149         CNS_11643_5,
    150         CNS_11643_6,
    151         CNS_11643_7
    152 } StateEnum;
    153 
    154 /* is the StateEnum charset value for a DBCS charset? */
    155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    156 
    157 #define CSM(cs) ((uint16_t)1<<(cs))
    158 
    159 /*
    160  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    161  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    162  *
    163  * Note: The converter uses some leniency:
    164  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    165  *   all versions, not just JIS7 and JIS8.
    166  * - ICU does not distinguish between different versions of JIS X 0208.
    167  */
    168 enum { MAX_JA_VERSION=4 };
    169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    170     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    171     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    174     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    175 };
    176 
    177 typedef enum {
    178         ASCII1=0,
    179         LATIN1,
    180         SBCS,
    181         DBCS,
    182         MBCS,
    183         HWKANA
    184 }Cnv2022Type;
    185 
    186 typedef struct ISO2022State {
    187     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    188     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    189     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    190 } ISO2022State;
    191 
    192 #define UCNV_OPTIONS_VERSION_MASK 0xf
    193 #define UCNV_2022_MAX_CONVERTERS 10
    194 
    195 typedef struct{
    196     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    197     UConverter *currentConverter;
    198     Cnv2022Type currentType;
    199     ISO2022State toU2022State, fromU2022State;
    200     uint32_t key;
    201     uint32_t version;
    202 #ifdef U_ENABLE_GENERIC_ISO_2022
    203     UBool isFirstBuffer;
    204 #endif
    205     UBool isEmptySegment;
    206     char name[30];
    207     char locale[3];
    208 }UConverterDataISO2022;
    209 
    210 /* Protos */
    211 /* ISO-2022 ----------------------------------------------------------------- */
    212 
    213 /*Forward declaration */
    214 U_CFUNC void
    215 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    216                       UErrorCode * err);
    217 U_CFUNC void
    218 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    219                                     UErrorCode * err);
    220 
    221 #define ESC_2022 0x1B /*ESC*/
    222 
    223 typedef enum
    224 {
    225         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    226         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    227         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    228         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    229 } UCNV_TableStates_2022;
    230 
    231 /*
    232 * The way these state transition arrays work is:
    233 * ex : ESC$B is the sequence for JISX208
    234 *      a) First Iteration: char is ESC
    235 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    236 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    237 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    238 *             value of x is stored at escSeqStateTable_Key_2022[0]
    239 *        iii) Save this index as offset
    240 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    241 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    242 *     b) Switch on this state and continue to next char
    243 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    244 *             which is normalize_esq_chars_2022[36] == 4
    245 *         ii) x is currently 1(from above)
    246 *               x<<=5 -- x is now 32
    247 *               x+=normalize_esq_chars_2022[36]
    248 *               now x is 36
    249 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    250 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    251 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    252 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    253 *     c) Switch on this state and continue to next char
    254 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    255 *        ii) x is currently 36 (from above)
    256 *            x<<=5 -- x is now 1152
    257 *            x+=normalize_esq_chars_2022[66]
    258 *            now x is 1161
    259 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    260 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    261 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    262 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    263 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    264 */
    265 
    266 
    267 /*Below are the 3 arrays depicting a state transition table*/
    268 static const int8_t normalize_esq_chars_2022[256] = {
    269 /*       0      1       2       3       4      5       6        7       8       9           */
    270 
    271          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    272         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    273         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    274         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    275         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    276         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    277         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    278         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    279         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    280         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    281         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    282         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0
    297 };
    298 
    299 #ifdef U_ENABLE_GENERIC_ISO_2022
    300 /*
    301  * When the generic ISO-2022 converter is completely removed, not just disabled
    302  * per #ifdef, then the following state table and the associated tables that are
    303  * dimensioned with MAX_STATES_2022 should be trimmed.
    304  *
    305  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    306  * the associated escape sequences starting with ESC ( B should be removed.
    307  * This includes the ones with key values 1097 and all of the ones above 1000000.
    308  *
    309  * For the latter, the tables can simply be truncated.
    310  * For the former, since the tables must be kept parallel, it is probably best
    311  * to simply duplicate an adjacent table cell, parallel in all tables.
    312  *
    313  * It may make sense to restructure the tables, especially by using small search
    314  * tables for the variants instead of indexing them parallel to the table here.
    315  */
    316 #endif
    317 
    318 #define MAX_STATES_2022 74
    319 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    320 /*   0           1           2           3           4           5           6           7           8           9           */
    321 
    322      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    323     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    324     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    325     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    326     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    327     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    328     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    329     ,35947631   ,35947635   ,35947636   ,35947638
    330 };
    331 
    332 #ifdef U_ENABLE_GENERIC_ISO_2022
    333 
    334 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    335  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    336 
    337      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    338     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    339     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    340     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    341     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    342     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    343     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    344     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    345 };
    346 
    347 #endif
    348 
    349 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    350 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    351      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    352     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    353     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    354     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    355     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    357     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    358     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    359 };
    360 
    361 
    362 /* Type def for refactoring changeState_2022 code*/
    363 typedef enum{
    364 #ifdef U_ENABLE_GENERIC_ISO_2022
    365     ISO_2022=0,
    366 #endif
    367     ISO_2022_JP=1,
    368     ISO_2022_KR=2,
    369     ISO_2022_CN=3
    370 } Variant2022;
    371 
    372 /*********** ISO 2022 Converter Protos ***********/
    373 static void
    374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    375 
    376 static void
    377  _ISO2022Close(UConverter *converter);
    378 
    379 static void
    380 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    381 
    382 static const char*
    383 _ISO2022getName(const UConverter* cnv);
    384 
    385 static void
    386 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    387 
    388 static UConverter *
    389 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    390 
    391 #ifdef U_ENABLE_GENERIC_ISO_2022
    392 static void
    393 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    394 #endif
    395 
    396 /*const UConverterSharedData _ISO2022Data;*/
    397 static const UConverterSharedData _ISO2022JPData;
    398 static const UConverterSharedData _ISO2022KRData;
    399 static const UConverterSharedData _ISO2022CNData;
    400 
    401 /*************** Converter implementations ******************/
    402 
    403 /* The purpose of this function is to get around gcc compiler warnings. */
    404 static U_INLINE void
    405 fromUWriteUInt8(UConverter *cnv,
    406                  const char *bytes, int32_t length,
    407                  uint8_t **target, const char *targetLimit,
    408                  int32_t **offsets,
    409                  int32_t sourceIndex,
    410                  UErrorCode *pErrorCode)
    411 {
    412     char *targetChars = (char *)*target;
    413     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    414                          offsets, sourceIndex, pErrorCode);
    415     *target = (uint8_t*)targetChars;
    416 
    417 }
    418 
    419 static U_INLINE void
    420 setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){
    421     if(myConverterData->version == 1) {
    422         UConverter *cnv = myConverterData->currentConverter;
    423 
    424         cnv->toUnicodeStatus=0;     /* offset */
    425         cnv->mode=0;                /* state */
    426         cnv->toULength=0;           /* byteIndex */
    427     }
    428 }
    429 
    430 static U_INLINE void
    431 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    432    /* in ISO-2022-KR the designator sequence appears only once
    433     * in a file so we append it only once
    434     */
    435     if( converter->charErrorBufferLength==0){
    436 
    437         converter->charErrorBufferLength = 4;
    438         converter->charErrorBuffer[0] = 0x1b;
    439         converter->charErrorBuffer[1] = 0x24;
    440         converter->charErrorBuffer[2] = 0x29;
    441         converter->charErrorBuffer[3] = 0x43;
    442     }
    443     if(myConverterData->version == 1) {
    444         UConverter *cnv = myConverterData->currentConverter;
    445 
    446         cnv->fromUChar32=0;
    447         cnv->fromUnicodeStatus=1;   /* prevLength */
    448     }
    449 }
    450 
    451 static void
    452 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    453 
    454     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    455 
    456     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    457     if(cnv->extraInfo != NULL) {
    458         UConverterNamePieces stackPieces;
    459         UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) };
    460         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    461         uint32_t version;
    462 
    463         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    464 
    465         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    466         myConverterData->currentType = ASCII1;
    467         cnv->fromUnicodeStatus =FALSE;
    468         if(pArgs->locale){
    469             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    470         }
    471         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    472         myConverterData->version = version;
    473         if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
    474             (myLocale[2]=='_' || myLocale[2]=='\0'))
    475         {
    476             size_t len=0;
    477             /* open the required converters and cache them */
    478             if(version>MAX_JA_VERSION) {
    479                 /* prevent indexing beyond jpCharsetMasks[] */
    480                 myConverterData->version = version = 0;
    481             }
    482             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    483                 myConverterData->myConverterArray[ISO8859_7] =
    484                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    485             }
    486             myConverterData->myConverterArray[JISX208] =
    487                 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
    488             if(jpCharsetMasks[version]&CSM(JISX212)) {
    489                 myConverterData->myConverterArray[JISX212] =
    490                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    491             }
    492             if(jpCharsetMasks[version]&CSM(GB2312)) {
    493                 myConverterData->myConverterArray[GB2312] =
    494                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
    495             }
    496             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    497                 myConverterData->myConverterArray[KSC5601] =
    498                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    499             }
    500 
    501             /* set the function pointers to appropriate funtions */
    502             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    503             uprv_strcpy(myConverterData->locale,"ja");
    504 
    505             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    506             len = uprv_strlen(myConverterData->name);
    507             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    508             myConverterData->name[len+1]='\0';
    509         }
    510         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    511             (myLocale[2]=='_' || myLocale[2]=='\0'))
    512         {
    513             const char *cnvName;
    514             if(version==1) {
    515                 cnvName="icu-internal-25546";
    516             } else {
    517                 cnvName="ksc_5601";
    518                 myConverterData->version=version=0;
    519             }
    520             if(pArgs->onlyTestIsLoadable) {
    521                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    522                 uprv_free(cnv->extraInfo);
    523                 cnv->extraInfo=NULL;
    524                 return;
    525             } else {
    526                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    527                 if (U_FAILURE(*errorCode)) {
    528                     _ISO2022Close(cnv);
    529                     return;
    530                 }
    531 
    532                 if(version==1) {
    533                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    534                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    535                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    536                 }else{
    537                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    538                 }
    539 
    540                 /* initialize the state variables */
    541                 setInitialStateToUnicodeKR(cnv, myConverterData);
    542                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    543 
    544                 /* set the function pointers to appropriate funtions */
    545                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    546                 uprv_strcpy(myConverterData->locale,"ko");
    547             }
    548         }
    549         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    550             (myLocale[2]=='_' || myLocale[2]=='\0'))
    551         {
    552 
    553             /* open the required converters and cache them */
    554             myConverterData->myConverterArray[GB2312_1] =
    555                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    556             if(version==1) {
    557                 myConverterData->myConverterArray[ISO_IR_165] =
    558                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    559             }
    560             myConverterData->myConverterArray[CNS_11643] =
    561                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    562 
    563 
    564             /* set the function pointers to appropriate funtions */
    565             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    566             uprv_strcpy(myConverterData->locale,"cn");
    567 
    568             if (version==0){
    569                 myConverterData->version = 0;
    570                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    571             }else if (version==1){
    572                 myConverterData->version = 1;
    573                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    574             }else {
    575                 myConverterData->version = 2;
    576                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
    577             }
    578         }
    579         else{
    580 #ifdef U_ENABLE_GENERIC_ISO_2022
    581             myConverterData->isFirstBuffer = TRUE;
    582 
    583             /* append the UTF-8 escape sequence */
    584             cnv->charErrorBufferLength = 3;
    585             cnv->charErrorBuffer[0] = 0x1b;
    586             cnv->charErrorBuffer[1] = 0x25;
    587             cnv->charErrorBuffer[2] = 0x42;
    588 
    589             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    590             /* initialize the state variables */
    591             uprv_strcpy(myConverterData->name,"ISO_2022");
    592 #else
    593             *errorCode = U_UNSUPPORTED_ERROR;
    594             return;
    595 #endif
    596         }
    597 
    598         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    599 
    600         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    601             _ISO2022Close(cnv);
    602         }
    603     } else {
    604         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    605     }
    606 }
    607 
    608 
    609 static void
    610 _ISO2022Close(UConverter *converter) {
    611     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    612     UConverterSharedData **array = myData->myConverterArray;
    613     int32_t i;
    614 
    615     if (converter->extraInfo != NULL) {
    616         /*close the array of converter pointers and free the memory*/
    617         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    618             if(array[i]!=NULL) {
    619                 ucnv_unloadSharedDataIfReady(array[i]);
    620             }
    621         }
    622 
    623         ucnv_close(myData->currentConverter);
    624 
    625         if(!converter->isExtraLocal){
    626             uprv_free (converter->extraInfo);
    627             converter->extraInfo = NULL;
    628         }
    629     }
    630 }
    631 
    632 static void
    633 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    634     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    635     if(choice<=UCNV_RESET_TO_UNICODE) {
    636         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    637         myConverterData->key = 0;
    638         myConverterData->isEmptySegment = FALSE;
    639     }
    640     if(choice!=UCNV_RESET_TO_UNICODE) {
    641         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    642     }
    643 #ifdef U_ENABLE_GENERIC_ISO_2022
    644     if(myConverterData->locale[0] == 0){
    645         if(choice<=UCNV_RESET_TO_UNICODE) {
    646             myConverterData->isFirstBuffer = TRUE;
    647             myConverterData->key = 0;
    648             if (converter->mode == UCNV_SO){
    649                 ucnv_close (myConverterData->currentConverter);
    650                 myConverterData->currentConverter=NULL;
    651             }
    652             converter->mode = UCNV_SI;
    653         }
    654         if(choice!=UCNV_RESET_TO_UNICODE) {
    655             /* re-append UTF-8 escape sequence */
    656             converter->charErrorBufferLength = 3;
    657             converter->charErrorBuffer[0] = 0x1b;
    658             converter->charErrorBuffer[1] = 0x28;
    659             converter->charErrorBuffer[2] = 0x42;
    660         }
    661     }
    662     else
    663 #endif
    664     {
    665         /* reset the state variables */
    666         if(myConverterData->locale[0] == 'k'){
    667             if(choice<=UCNV_RESET_TO_UNICODE) {
    668                 setInitialStateToUnicodeKR(converter, myConverterData);
    669             }
    670             if(choice!=UCNV_RESET_TO_UNICODE) {
    671                 setInitialStateFromUnicodeKR(converter, myConverterData);
    672             }
    673         }
    674     }
    675 }
    676 
    677 static const char*
    678 _ISO2022getName(const UConverter* cnv){
    679     if(cnv->extraInfo){
    680         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    681         return myData->name;
    682     }
    683     return NULL;
    684 }
    685 
    686 
    687 /*************** to unicode *******************/
    688 /****************************************************************************
    689  * Recognized escape sequences are
    690  * <ESC>(B  ASCII
    691  * <ESC>.A  ISO-8859-1
    692  * <ESC>.F  ISO-8859-7
    693  * <ESC>(J  JISX-201
    694  * <ESC>(I  JISX-201
    695  * <ESC>$B  JISX-208
    696  * <ESC>$@  JISX-208
    697  * <ESC>$(D JISX-212
    698  * <ESC>$A  GB2312
    699  * <ESC>$(C KSC5601
    700  */
    701 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    702 /*      0                1               2               3               4               5               6               7               8               9    */
    703     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    704     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    705     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    706     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    707     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    708     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    709     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    710     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    711 };
    712 
    713 /*************** to unicode *******************/
    714 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    715 /*      0                1               2               3               4               5               6               7               8               9    */
    716      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    717     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    718     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    719     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    720     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    721     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    722     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    723     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    724 };
    725 
    726 
    727 static UCNV_TableStates_2022
    728 getKey_2022(char c,int32_t* key,int32_t* offset){
    729     int32_t togo;
    730     int32_t low = 0;
    731     int32_t hi = MAX_STATES_2022;
    732     int32_t oldmid=0;
    733 
    734     togo = normalize_esq_chars_2022[(uint8_t)c];
    735     if(togo == 0) {
    736         /* not a valid character anywhere in an escape sequence */
    737         *key = 0;
    738         *offset = 0;
    739         return INVALID_2022;
    740     }
    741     togo = (*key << 5) + togo;
    742 
    743     while (hi != low)  /*binary search*/{
    744 
    745         register int32_t mid = (hi+low) >> 1; /*Finds median*/
    746 
    747         if (mid == oldmid)
    748             break;
    749 
    750         if (escSeqStateTable_Key_2022[mid] > togo){
    751             hi = mid;
    752         }
    753         else if (escSeqStateTable_Key_2022[mid] < togo){
    754             low = mid;
    755         }
    756         else /*we found it*/{
    757             *key = togo;
    758             *offset = mid;
    759             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    760         }
    761         oldmid = mid;
    762 
    763     }
    764 
    765     *key = 0;
    766     *offset = 0;
    767     return INVALID_2022;
    768 }
    769 
    770 /*runs through a state machine to determine the escape sequence - codepage correspondance
    771  */
    772 static void
    773 changeState_2022(UConverter* _this,
    774                 const char** source,
    775                 const char* sourceLimit,
    776                 Variant2022 var,
    777                 UErrorCode* err){
    778     UCNV_TableStates_2022 value;
    779     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    780     uint32_t key = myData2022->key;
    781     int32_t offset = 0;
    782     int8_t initialToULength = _this->toULength;
    783     char c;
    784 
    785     value = VALID_NON_TERMINAL_2022;
    786     while (*source < sourceLimit) {
    787         c = *(*source)++;
    788         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    789         value = getKey_2022(c,(int32_t *) &key, &offset);
    790 
    791         switch (value){
    792 
    793         case VALID_NON_TERMINAL_2022 :
    794             /* continue with the loop */
    795             break;
    796 
    797         case VALID_TERMINAL_2022:
    798             key = 0;
    799             goto DONE;
    800 
    801         case INVALID_2022:
    802             goto DONE;
    803 
    804         case VALID_MAYBE_TERMINAL_2022:
    805 #ifdef U_ENABLE_GENERIC_ISO_2022
    806             /* ESC ( B is ambiguous only for ISO_2022 itself */
    807             if(var == ISO_2022) {
    808                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    809                 _this->toULength = 0;
    810 
    811                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    812 
    813                 /* continue with the loop */
    814                 value = VALID_NON_TERMINAL_2022;
    815                 break;
    816             } else
    817 #endif
    818             {
    819                 /* not ISO_2022 itself, finish here */
    820                 value = VALID_TERMINAL_2022;
    821                 key = 0;
    822                 goto DONE;
    823             }
    824         }
    825     }
    826 
    827 DONE:
    828     myData2022->key = key;
    829 
    830     if (value == VALID_NON_TERMINAL_2022) {
    831         /* indicate that the escape sequence is incomplete: key!=0 */
    832         return;
    833     } else if (value == INVALID_2022 ) {
    834         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    835     } else /* value == VALID_TERMINAL_2022 */ {
    836         switch(var){
    837 #ifdef U_ENABLE_GENERIC_ISO_2022
    838         case ISO_2022:
    839         {
    840             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    841             if(chosenConverterName == NULL) {
    842                 /* SS2 or SS3 */
    843                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    844                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    845                 return;
    846             }
    847 
    848             _this->mode = UCNV_SI;
    849             ucnv_close(myData2022->currentConverter);
    850             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    851             if(U_SUCCESS(*err)) {
    852                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    853                 _this->mode = UCNV_SO;
    854             }
    855             break;
    856         }
    857 #endif
    858         case ISO_2022_JP:
    859             {
    860                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    861                 switch(tempState) {
    862                 case INVALID_STATE:
    863                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    864                     break;
    865                 case SS2_STATE:
    866                     if(myData2022->toU2022State.cs[2]!=0) {
    867                         if(myData2022->toU2022State.g<2) {
    868                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    869                         }
    870                         myData2022->toU2022State.g=2;
    871                     } else {
    872                         /* illegal to have SS2 before a matching designator */
    873                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    874                     }
    875                     break;
    876                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    877                 case ISO8859_1:
    878                 case ISO8859_7:
    879                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    880                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    881                     } else {
    882                         /* G2 charset for SS2 */
    883                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    884                     }
    885                     break;
    886                 default:
    887                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    888                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    889                     } else {
    890                         /* G0 charset */
    891                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    892                     }
    893                     break;
    894                 }
    895             }
    896             break;
    897         case ISO_2022_CN:
    898             {
    899                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    900                 switch(tempState) {
    901                 case INVALID_STATE:
    902                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    903                     break;
    904                 case SS2_STATE:
    905                     if(myData2022->toU2022State.cs[2]!=0) {
    906                         if(myData2022->toU2022State.g<2) {
    907                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    908                         }
    909                         myData2022->toU2022State.g=2;
    910                     } else {
    911                         /* illegal to have SS2 before a matching designator */
    912                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    913                     }
    914                     break;
    915                 case SS3_STATE:
    916                     if(myData2022->toU2022State.cs[3]!=0) {
    917                         if(myData2022->toU2022State.g<2) {
    918                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    919                         }
    920                         myData2022->toU2022State.g=3;
    921                     } else {
    922                         /* illegal to have SS3 before a matching designator */
    923                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    924                     }
    925                     break;
    926                 case ISO_IR_165:
    927                     if(myData2022->version==0) {
    928                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    929                         break;
    930                     }
    931                     /*fall through*/
    932                 case GB2312_1:
    933                     /*fall through*/
    934                 case CNS_11643_1:
    935                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    936                     break;
    937                 case CNS_11643_2:
    938                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    939                     break;
    940                 default:
    941                     /* other CNS 11643 planes */
    942                     if(myData2022->version==0) {
    943                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    944                     } else {
    945                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    946                     }
    947                     break;
    948                 }
    949             }
    950             break;
    951         case ISO_2022_KR:
    952             if(offset==0x30){
    953                 /* nothing to be done, just accept this one escape sequence */
    954             } else {
    955                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    956             }
    957             break;
    958 
    959         default:
    960             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    961             break;
    962         }
    963     }
    964     if(U_SUCCESS(*err)) {
    965         _this->toULength = 0;
    966     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
    967         if(_this->toULength>1) {
    968             /*
    969              * Ticket 5691: consistent illegal sequences:
    970              * - We include at least the first byte (ESC) in the illegal sequence.
    971              * - If any of the non-initial bytes could be the start of a character,
    972              *   we stop the illegal sequence before the first one of those.
    973              *   In escape sequences, all following bytes are "printable", that is,
    974              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
    975              *   they are valid single/lead bytes.
    976              *   For simplicity, we always only report the initial ESC byte as the
    977              *   illegal sequence and back out all other bytes we looked at.
    978              */
    979             /* Back out some bytes. */
    980             int8_t backOutDistance=_this->toULength-1;
    981             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
    982             if(backOutDistance<=bytesFromThisBuffer) {
    983                 /* same as initialToULength<=1 */
    984                 *source-=backOutDistance;
    985             } else {
    986                 /* Back out bytes from the previous buffer: Need to replay them. */
    987                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
    988                 /* same as -(initialToULength-1) */
    989                 /* preToULength is negative! */
    990                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
    991                 *source-=bytesFromThisBuffer;
    992             }
    993             _this->toULength=1;
    994         }
    995     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
    996         _this->toUCallbackReason = UCNV_UNASSIGNED;
    997     }
    998 }
    999 
   1000 /*Checks the characters of the buffer against valid 2022 escape sequences
   1001 *if the match we return a pointer to the initial start of the sequence otherwise
   1002 *we return sourceLimit
   1003 */
   1004 /*for 2022 looks ahead in the stream
   1005  *to determine the longest possible convertible
   1006  *data stream
   1007  */
   1008 static U_INLINE const char*
   1009 getEndOfBuffer_2022(const char** source,
   1010                    const char* sourceLimit,
   1011                    UBool flush){
   1012 
   1013     const char* mySource = *source;
   1014 
   1015 #ifdef U_ENABLE_GENERIC_ISO_2022
   1016     if (*source >= sourceLimit)
   1017         return sourceLimit;
   1018 
   1019     do{
   1020 
   1021         if (*mySource == ESC_2022){
   1022             int8_t i;
   1023             int32_t key = 0;
   1024             int32_t offset;
   1025             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1026 
   1027             /* Kludge: I could not
   1028             * figure out the reason for validating an escape sequence
   1029             * twice - once here and once in changeState_2022().
   1030             * is it possible to have an ESC character in a ISO2022
   1031             * byte stream which is valid in a code page? Is it legal?
   1032             */
   1033             for (i=0;
   1034             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1035             i++) {
   1036                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1037             }
   1038             if (value > 0 || *mySource==ESC_2022)
   1039                 return mySource;
   1040 
   1041             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1042                 return sourceLimit;
   1043         }
   1044     }while (++mySource < sourceLimit);
   1045 
   1046     return sourceLimit;
   1047 #else
   1048     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1049         ++mySource;
   1050     }
   1051     return mySource;
   1052 #endif
   1053 }
   1054 
   1055 
   1056 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1057  * any future change in _MBCSFromUChar32() function should be reflected here.
   1058  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1059  */
   1060 static U_INLINE int32_t
   1061 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1062                                          UChar32 c,
   1063                                          uint32_t* value,
   1064                                          UBool useFallback,
   1065                                          int outputType)
   1066 {
   1067     const int32_t *cx;
   1068     const uint16_t *table;
   1069     uint32_t stage2Entry;
   1070     uint32_t myValue;
   1071     int32_t length;
   1072     const uint8_t *p;
   1073     /*
   1074      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1075      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1076      * else U_INTERNAL_PROGRAM_ERROR.
   1077      */
   1078     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1079     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1080         table=sharedData->mbcs.fromUnicodeTable;
   1081         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1082         /* get the bytes and the length for the output */
   1083         if(outputType==MBCS_OUTPUT_2){
   1084             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1085             if(myValue<=0xff) {
   1086                 length=1;
   1087             } else {
   1088                 length=2;
   1089             }
   1090         } else /* outputType==MBCS_OUTPUT_3 */ {
   1091             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1092             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1093             if(myValue<=0xff) {
   1094                 length=1;
   1095             } else if(myValue<=0xffff) {
   1096                 length=2;
   1097             } else {
   1098                 length=3;
   1099             }
   1100         }
   1101         /* is this code point assigned, or do we use fallbacks? */
   1102         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1103             /* assigned */
   1104             *value=myValue;
   1105             return length;
   1106         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1107             /*
   1108              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1109              * There is no way with this data structure for fallback output
   1110              * to be a zero byte.
   1111              */
   1112             *value=myValue;
   1113             return -length;
   1114         }
   1115     }
   1116 
   1117     cx=sharedData->mbcs.extIndexes;
   1118     if(cx!=NULL) {
   1119         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1120     }
   1121 
   1122     /* unassigned */
   1123     return 0;
   1124 }
   1125 
   1126 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1127  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1128  * @param retval pointer to output byte
   1129  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1130  */
   1131 static U_INLINE int32_t
   1132 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1133                                        UChar32 c,
   1134                                        uint32_t* retval,
   1135                                        UBool useFallback)
   1136 {
   1137     const uint16_t *table;
   1138     int32_t value;
   1139     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1140     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1141         return 0;
   1142     }
   1143     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1144     table=sharedData->mbcs.fromUnicodeTable;
   1145     /* get the byte for the output */
   1146     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1147     /* is this code point assigned, or do we use fallbacks? */
   1148     *retval=(uint32_t)(value&0xff);
   1149     if(value>=0xf00) {
   1150         return 1;  /* roundtrip */
   1151     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1152         return -1;  /* fallback taken */
   1153     } else {
   1154         return 0;  /* no mapping */
   1155     }
   1156 }
   1157 
   1158 /*
   1159  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1160  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1161  * to move it to the ISO 2022 range 21..7E.
   1162  * Return 0 if out of range.
   1163  */
   1164 static U_INLINE uint32_t
   1165 _2022FromGR94DBCS(uint32_t value) {
   1166     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1167         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1168     ) {
   1169         return value - 0x8080;  /* shift down to 21..7e byte range */
   1170     } else {
   1171         return 0;  /* not valid for ISO 2022 */
   1172     }
   1173 }
   1174 
   1175 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1176 /*
   1177  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1178  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1179  * unchanged.
   1180  */
   1181 static U_INLINE uint32_t
   1182 _2022ToGR94DBCS(uint32_t value) {
   1183     uint32_t returnValue = value + 0x8080;
   1184     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1185         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1186         return returnValue;
   1187     } else {
   1188         return value;
   1189     }
   1190 }
   1191 #endif
   1192 
   1193 #ifdef U_ENABLE_GENERIC_ISO_2022
   1194 
   1195 /**********************************************************************************
   1196 *  ISO-2022 Converter
   1197 *
   1198 *
   1199 */
   1200 
   1201 static void
   1202 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1203                                                            UErrorCode* err){
   1204     const char* mySourceLimit, *realSourceLimit;
   1205     const char* sourceStart;
   1206     const UChar* myTargetStart;
   1207     UConverter* saveThis;
   1208     UConverterDataISO2022* myData;
   1209     int8_t length;
   1210 
   1211     saveThis = args->converter;
   1212     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1213 
   1214     realSourceLimit = args->sourceLimit;
   1215     while (args->source < realSourceLimit) {
   1216         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1217             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1218             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1219 
   1220             if(args->source < mySourceLimit) {
   1221                 if(myData->currentConverter==NULL) {
   1222                     myData->currentConverter = ucnv_open("ASCII",err);
   1223                     if(U_FAILURE(*err)){
   1224                         return;
   1225                     }
   1226 
   1227                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1228                     saveThis->mode = UCNV_SO;
   1229                 }
   1230 
   1231                 /* convert to before the ESC or until the end of the buffer */
   1232                 myData->isFirstBuffer=FALSE;
   1233                 sourceStart = args->source;
   1234                 myTargetStart = args->target;
   1235                 args->converter = myData->currentConverter;
   1236                 ucnv_toUnicode(args->converter,
   1237                     &args->target,
   1238                     args->targetLimit,
   1239                     &args->source,
   1240                     mySourceLimit,
   1241                     args->offsets,
   1242                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1243                     err);
   1244                 args->converter = saveThis;
   1245 
   1246                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1247                     /* move the overflow buffer */
   1248                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1249                     myData->currentConverter->UCharErrorBufferLength = 0;
   1250                     if(length > 0) {
   1251                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1252                                     myData->currentConverter->UCharErrorBuffer,
   1253                                     length*U_SIZEOF_UCHAR);
   1254                     }
   1255                     return;
   1256                 }
   1257 
   1258                 /*
   1259                  * At least one of:
   1260                  * -Error while converting
   1261                  * -Done with entire buffer
   1262                  * -Need to write offsets or update the current offset
   1263                  *  (leave that up to the code in ucnv.c)
   1264                  *
   1265                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1266                  */
   1267                 if (U_FAILURE(*err) ||
   1268                     (args->source == realSourceLimit) ||
   1269                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1270                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1271                 ) {
   1272                     /* copy partial or error input for truncated detection and error handling */
   1273                     if(U_FAILURE(*err)) {
   1274                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1275                         if(length > 0) {
   1276                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1277                         }
   1278                     } else {
   1279                         length = saveThis->toULength = myData->currentConverter->toULength;
   1280                         if(length > 0) {
   1281                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1282                             if(args->source < mySourceLimit) {
   1283                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1284                             }
   1285                         }
   1286                     }
   1287                     return;
   1288                 }
   1289             }
   1290         }
   1291 
   1292         sourceStart = args->source;
   1293         changeState_2022(args->converter,
   1294                &(args->source),
   1295                realSourceLimit,
   1296                ISO_2022,
   1297                err);
   1298         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1299             /* let the ucnv.c code update its current offset */
   1300             return;
   1301         }
   1302     }
   1303 }
   1304 
   1305 #endif
   1306 
   1307 /*
   1308  * To Unicode Callback helper function
   1309  */
   1310 static void
   1311 toUnicodeCallback(UConverter *cnv,
   1312                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1313                   UErrorCode* err){
   1314     if(sourceChar>0xff){
   1315         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1316         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1317         cnv->toULength = 2;
   1318     }
   1319     else{
   1320         cnv->toUBytes[0] =(char) sourceChar;
   1321         cnv->toULength = 1;
   1322     }
   1323 
   1324     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1325         *err = U_INVALID_CHAR_FOUND;
   1326     }
   1327     else{
   1328         *err = U_ILLEGAL_CHAR_FOUND;
   1329     }
   1330 }
   1331 
   1332 /**************************************ISO-2022-JP*************************************************/
   1333 
   1334 /************************************** IMPORTANT **************************************************
   1335 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1336 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1337 * The converter iterates over each Unicode codepoint
   1338 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1339 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1340 * would do as far as possible.
   1341 *
   1342 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1343 * sure that ISO-2022 is also changed.
   1344 ***************************************************************************************************
   1345 */
   1346 
   1347 /***************************************************************************************************
   1348 * Rules for ISO-2022-jp encoding
   1349 * (i)   Escape sequences must be fully contained within a line they should not
   1350 *       span new lines or CRs
   1351 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1352 *       JIS-Roman character escape sequence should follow before the line terminates
   1353 * (iii) If the first character on the line is represented by two bytes then a two
   1354 *       byte character escape sequence should precede it
   1355 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1356 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1357 *       and invoked with SS2 (ESC N).
   1358 * (vi)  If there is any G0 designation in text, there must be a switch to
   1359 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1360 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1361 *       characters such as tab or CRLF.
   1362 * (vi)  Supported encodings:
   1363 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1364 *
   1365 *  source : RFC-1554
   1366 *
   1367 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1368 *          KSC5601 : alias to ibm-949 mapping table
   1369 *          GB2312 : alias to ibm-1386 mapping table
   1370 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1371 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1372 */
   1373 
   1374 /* preference order of JP charsets */
   1375 static const StateEnum jpCharsetPref[]={
   1376     ASCII,
   1377     JISX201,
   1378     ISO8859_1,
   1379     ISO8859_7,
   1380     JISX208,
   1381     JISX212,
   1382     GB2312,
   1383     KSC5601,
   1384     HWKANA_7BIT
   1385 };
   1386 
   1387 /*
   1388  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1389  * not in order of jpCharsetPref[]!
   1390  */
   1391 static const char escSeqChars[][6] ={
   1392     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1393     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1394     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1395     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1396     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1397     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1398     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1399     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1400     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1401 
   1402 };
   1403 static  const int8_t escSeqCharsLen[] ={
   1404     3, /* length of <ESC>(B  ASCII       */
   1405     3, /* length of <ESC>.A  ISO-8859-1  */
   1406     3, /* length of <ESC>.F  ISO-8859-7  */
   1407     3, /* length of <ESC>(J  JISX-201    */
   1408     3, /* length of <ESC>$B  JISX-208    */
   1409     4, /* length of <ESC>$(D JISX-212    */
   1410     3, /* length of <ESC>$A  GB2312      */
   1411     4, /* length of <ESC>$(C KSC5601     */
   1412     3  /* length of <ESC>(I  HWKANA_7BIT */
   1413 };
   1414 
   1415 /*
   1416 * The iteration over various code pages works this way:
   1417 * i)   Get the currentState from myConverterData->currentState
   1418 * ii)  Check if the character is mapped to a valid character in the currentState
   1419 *      Yes ->  a) set the initIterState to currentState
   1420 *       b) remain in this state until an invalid character is found
   1421 *      No  ->  a) go to the next code page and find the character
   1422 * iii) Before changing the state increment the current state check if the current state
   1423 *      is equal to the intitIteration state
   1424 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1425 *       break and return a U_INVALID_CHARACTER error
   1426 *      No  ->  Continue and find the character in next code page
   1427 *
   1428 *
   1429 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1430 */
   1431 
   1432 /* Map 00..7F to Unicode according to JIS X 0201. */
   1433 static U_INLINE uint32_t
   1434 jisx201ToU(uint32_t value) {
   1435     if(value < 0x5c) {
   1436         return value;
   1437     } else if(value == 0x5c) {
   1438         return 0xa5;
   1439     } else if(value == 0x7e) {
   1440         return 0x203e;
   1441     } else /* value <= 0x7f */ {
   1442         return value;
   1443     }
   1444 }
   1445 
   1446 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1447 static U_INLINE uint32_t
   1448 jisx201FromU(uint32_t value) {
   1449     if(value<=0x7f) {
   1450         if(value!=0x5c && value!=0x7e) {
   1451             return value;
   1452         }
   1453     } else if(value==0xa5) {
   1454         return 0x5c;
   1455     } else if(value==0x203e) {
   1456         return 0x7e;
   1457     }
   1458     return 0xfffe;
   1459 }
   1460 
   1461 /*
   1462  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1463  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1464  * Return 0 if the byte pair is out of range.
   1465  */
   1466 static U_INLINE uint32_t
   1467 _2022FromSJIS(uint32_t value) {
   1468     uint8_t trail;
   1469 
   1470     if(value > 0xEFFC) {
   1471         return 0;  /* beyond JIS X 0208 */
   1472     }
   1473 
   1474     trail = (uint8_t)value;
   1475 
   1476     value &= 0xff00;  /* lead byte */
   1477     if(value <= 0x9f00) {
   1478         value -= 0x7000;
   1479     } else /* 0xe000 <= value <= 0xef00 */ {
   1480         value -= 0xb000;
   1481     }
   1482     value <<= 1;
   1483 
   1484     if(trail <= 0x9e) {
   1485         value -= 0x100;
   1486         if(trail <= 0x7e) {
   1487             value |= trail - 0x1f;
   1488         } else {
   1489             value |= trail - 0x20;
   1490         }
   1491     } else /* trail <= 0xfc */ {
   1492         value |= trail - 0x7e;
   1493     }
   1494     return value;
   1495 }
   1496 
   1497 /*
   1498  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1499  * If either byte is outside 21..7E make sure that the result is not valid
   1500  * for Shift-JIS so that the converter catches it.
   1501  * Some invalid byte values already turn into equally invalid Shift-JIS
   1502  * byte values and need not be tested explicitly.
   1503  */
   1504 static U_INLINE void
   1505 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1506     if(c1&1) {
   1507         ++c1;
   1508         if(c2 <= 0x5f) {
   1509             c2 += 0x1f;
   1510         } else if(c2 <= 0x7e) {
   1511             c2 += 0x20;
   1512         } else {
   1513             c2 = 0;  /* invalid */
   1514         }
   1515     } else {
   1516         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1517             c2 += 0x7e;
   1518         } else {
   1519             c2 = 0;  /* invalid */
   1520         }
   1521     }
   1522     c1 >>= 1;
   1523     if(c1 <= 0x2f) {
   1524         c1 += 0x70;
   1525     } else if(c1 <= 0x3f) {
   1526         c1 += 0xb0;
   1527     } else {
   1528         c1 = 0;  /* invalid */
   1529     }
   1530     bytes[0] = (char)c1;
   1531     bytes[1] = (char)c2;
   1532 }
   1533 
   1534 /*
   1535  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1536  * Katakana.
   1537  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1538  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1539  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1540  */
   1541 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1542     0x2123,  /* U+FF61 */
   1543     0x2156,
   1544     0x2157,
   1545     0x2122,
   1546     0x2126,
   1547     0x2572,
   1548     0x2521,
   1549     0x2523,
   1550     0x2525,
   1551     0x2527,
   1552     0x2529,
   1553     0x2563,
   1554     0x2565,
   1555     0x2567,
   1556     0x2543,
   1557     0x213C,  /* U+FF70 */
   1558     0x2522,
   1559     0x2524,
   1560     0x2526,
   1561     0x2528,
   1562     0x252A,
   1563     0x252B,
   1564     0x252D,
   1565     0x252F,
   1566     0x2531,
   1567     0x2533,
   1568     0x2535,
   1569     0x2537,
   1570     0x2539,
   1571     0x253B,
   1572     0x253D,
   1573     0x253F,  /* U+FF80 */
   1574     0x2541,
   1575     0x2544,
   1576     0x2546,
   1577     0x2548,
   1578     0x254A,
   1579     0x254B,
   1580     0x254C,
   1581     0x254D,
   1582     0x254E,
   1583     0x254F,
   1584     0x2552,
   1585     0x2555,
   1586     0x2558,
   1587     0x255B,
   1588     0x255E,
   1589     0x255F,  /* U+FF90 */
   1590     0x2560,
   1591     0x2561,
   1592     0x2562,
   1593     0x2564,
   1594     0x2566,
   1595     0x2568,
   1596     0x2569,
   1597     0x256A,
   1598     0x256B,
   1599     0x256C,
   1600     0x256D,
   1601     0x256F,
   1602     0x2573,
   1603     0x212B,
   1604     0x212C   /* U+FF9F */
   1605 };
   1606 
   1607 static void
   1608 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1609     UConverter *cnv = args->converter;
   1610     UConverterDataISO2022 *converterData;
   1611     ISO2022State *pFromU2022State;
   1612     uint8_t *target = (uint8_t *) args->target;
   1613     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1614     const UChar* source = args->source;
   1615     const UChar* sourceLimit = args->sourceLimit;
   1616     int32_t* offsets = args->offsets;
   1617     UChar32 sourceChar;
   1618     char buffer[8];
   1619     int32_t len, outLen;
   1620     int8_t choices[10];
   1621     int32_t choiceCount;
   1622     uint32_t targetValue = 0;
   1623     UBool useFallback;
   1624 
   1625     int32_t i;
   1626     int8_t cs, g;
   1627 
   1628     /* set up the state */
   1629     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1630     pFromU2022State   = &converterData->fromU2022State;
   1631 
   1632     choiceCount = 0;
   1633 
   1634     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1635     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1636         goto getTrail;
   1637     }
   1638 
   1639     while(source < sourceLimit) {
   1640         if(target < targetLimit) {
   1641 
   1642             sourceChar  = *(source++);
   1643             /*check if the char is a First surrogate*/
   1644             if(UTF_IS_SURROGATE(sourceChar)) {
   1645                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   1646 getTrail:
   1647                     /*look ahead to find the trail surrogate*/
   1648                     if(source < sourceLimit) {
   1649                         /* test the following code unit */
   1650                         UChar trail=(UChar) *source;
   1651                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   1652                             source++;
   1653                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   1654                             cnv->fromUChar32=0x00;
   1655                             /* convert this supplementary code point */
   1656                             /* exit this condition tree */
   1657                         } else {
   1658                             /* this is an unmatched lead code unit (1st surrogate) */
   1659                             /* callback(illegal) */
   1660                             *err=U_ILLEGAL_CHAR_FOUND;
   1661                             cnv->fromUChar32=sourceChar;
   1662                             break;
   1663                         }
   1664                     } else {
   1665                         /* no more input */
   1666                         cnv->fromUChar32=sourceChar;
   1667                         break;
   1668                     }
   1669                 } else {
   1670                     /* this is an unmatched trail code unit (2nd surrogate) */
   1671                     /* callback(illegal) */
   1672                     *err=U_ILLEGAL_CHAR_FOUND;
   1673                     cnv->fromUChar32=sourceChar;
   1674                     break;
   1675                 }
   1676             }
   1677 
   1678             /* do not convert SO/SI/ESC */
   1679             if(IS_2022_CONTROL(sourceChar)) {
   1680                 /* callback(illegal) */
   1681                 *err=U_ILLEGAL_CHAR_FOUND;
   1682                 cnv->fromUChar32=sourceChar;
   1683                 break;
   1684             }
   1685 
   1686             /* do the conversion */
   1687 
   1688             if(choiceCount == 0) {
   1689                 uint16_t csm;
   1690 
   1691                 /*
   1692                  * The csm variable keeps track of which charsets are allowed
   1693                  * and not used yet while building the choices[].
   1694                  */
   1695                 csm = jpCharsetMasks[converterData->version];
   1696                 choiceCount = 0;
   1697 
   1698                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1699                 if(converterData->version == 3 || converterData->version == 4) {
   1700                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1701                 }
   1702                 /* Do not try single-byte half-width Katakana for other versions. */
   1703                 csm &= ~CSM(HWKANA_7BIT);
   1704 
   1705                 /* try the current G0 charset */
   1706                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1707                 csm &= ~CSM(cs);
   1708 
   1709                 /* try the current G2 charset */
   1710                 if((cs = pFromU2022State->cs[2]) != 0) {
   1711                     choices[choiceCount++] = cs;
   1712                     csm &= ~CSM(cs);
   1713                 }
   1714 
   1715                 /* try all the other possible charsets */
   1716                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1717                     cs = (int8_t)jpCharsetPref[i];
   1718                     if(CSM(cs) & csm) {
   1719                         choices[choiceCount++] = cs;
   1720                         csm &= ~CSM(cs);
   1721                     }
   1722                 }
   1723             }
   1724 
   1725             cs = g = 0;
   1726             /*
   1727              * len==0: no mapping found yet
   1728              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1729              * len>0: found a roundtrip result, done
   1730              */
   1731             len = 0;
   1732             /*
   1733              * We will turn off useFallback after finding a fallback,
   1734              * but we still get fallbacks from PUA code points as usual.
   1735              * Therefore, we will also need to check that we don't overwrite
   1736              * an early fallback with a later one.
   1737              */
   1738             useFallback = cnv->useFallback;
   1739 
   1740             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1741                 uint32_t value;
   1742                 int32_t len2;
   1743                 int8_t cs0 = choices[i];
   1744                 switch(cs0) {
   1745                 case ASCII:
   1746                     if(sourceChar <= 0x7f) {
   1747                         targetValue = (uint32_t)sourceChar;
   1748                         len = 1;
   1749                         cs = cs0;
   1750                         g = 0;
   1751                     }
   1752                     break;
   1753                 case ISO8859_1:
   1754                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1755                         targetValue = (uint32_t)sourceChar - 0x80;
   1756                         len = 1;
   1757                         cs = cs0;
   1758                         g = 2;
   1759                     }
   1760                     break;
   1761                 case HWKANA_7BIT:
   1762                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1763                         if(converterData->version==3) {
   1764                             /* JIS7: use G1 (SO) */
   1765                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1766                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1767                             len = 1;
   1768                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1769                             g = 1;
   1770                         } else if(converterData->version==4) {
   1771                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1772                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1773                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1774                             len = 1;
   1775 
   1776                             cs = pFromU2022State->cs[0];
   1777                             if(IS_JP_DBCS(cs)) {
   1778                                 /* switch from a DBCS charset to JISX201 */
   1779                                 cs = (int8_t)JISX201;
   1780                             }
   1781                             /* else stay in the current G0 charset */
   1782                             g = 0;
   1783                         }
   1784                         /* else do not use HWKANA_7BIT with other versions */
   1785                     }
   1786                     break;
   1787                 case JISX201:
   1788                     /* G0 SBCS */
   1789                     value = jisx201FromU(sourceChar);
   1790                     if(value <= 0x7f) {
   1791                         targetValue = value;
   1792                         len = 1;
   1793                         cs = cs0;
   1794                         g = 0;
   1795                         useFallback = FALSE;
   1796                     }
   1797                     break;
   1798                 case JISX208:
   1799                     /* G0 DBCS from Shift-JIS table */
   1800                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1801                                 converterData->myConverterArray[cs0],
   1802                                 sourceChar, &value,
   1803                                 useFallback, MBCS_OUTPUT_2);
   1804                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1805                         value = _2022FromSJIS(value);
   1806                         if(value != 0) {
   1807                             targetValue = value;
   1808                             len = len2;
   1809                             cs = cs0;
   1810                             g = 0;
   1811                             useFallback = FALSE;
   1812                         }
   1813                     } else if(len == 0 && useFallback &&
   1814                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1815                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1816                         len = -2;
   1817                         cs = cs0;
   1818                         g = 0;
   1819                         useFallback = FALSE;
   1820                     }
   1821                     break;
   1822                 case ISO8859_7:
   1823                     /* G0 SBCS forced to 7-bit output */
   1824                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1825                                 converterData->myConverterArray[cs0],
   1826                                 sourceChar, &value,
   1827                                 useFallback);
   1828                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1829                         targetValue = value - 0x80;
   1830                         len = len2;
   1831                         cs = cs0;
   1832                         g = 2;
   1833                         useFallback = FALSE;
   1834                     }
   1835                     break;
   1836                 default:
   1837                     /* G0 DBCS */
   1838                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1839                                 converterData->myConverterArray[cs0],
   1840                                 sourceChar, &value,
   1841                                 useFallback, MBCS_OUTPUT_2);
   1842                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1843                         if(cs0 == KSC5601) {
   1844                             /*
   1845                              * Check for valid bytes for the encoding scheme.
   1846                              * This is necessary because the sub-converter (windows-949)
   1847                              * has a broader encoding scheme than is valid for 2022.
   1848                              */
   1849                             value = _2022FromGR94DBCS(value);
   1850                             if(value == 0) {
   1851                                 break;
   1852                             }
   1853                         }
   1854                         targetValue = value;
   1855                         len = len2;
   1856                         cs = cs0;
   1857                         g = 0;
   1858                         useFallback = FALSE;
   1859                     }
   1860                     break;
   1861                 }
   1862             }
   1863 
   1864             if(len != 0) {
   1865                 if(len < 0) {
   1866                     len = -len;  /* fallback */
   1867                 }
   1868                 outLen = 0; /* count output bytes */
   1869 
   1870                 /* write SI if necessary (only for JIS7) */
   1871                 if(pFromU2022State->g == 1 && g == 0) {
   1872                     buffer[outLen++] = UCNV_SI;
   1873                     pFromU2022State->g = 0;
   1874                 }
   1875 
   1876                 /* write the designation sequence if necessary */
   1877                 if(cs != pFromU2022State->cs[g]) {
   1878                     int32_t escLen = escSeqCharsLen[cs];
   1879                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1880                     outLen += escLen;
   1881                     pFromU2022State->cs[g] = cs;
   1882 
   1883                     /* invalidate the choices[] */
   1884                     choiceCount = 0;
   1885                 }
   1886 
   1887                 /* write the shift sequence if necessary */
   1888                 if(g != pFromU2022State->g) {
   1889                     switch(g) {
   1890                     /* case 0 handled before writing escapes */
   1891                     case 1:
   1892                         buffer[outLen++] = UCNV_SO;
   1893                         pFromU2022State->g = 1;
   1894                         break;
   1895                     default: /* case 2 */
   1896                         buffer[outLen++] = 0x1b;
   1897                         buffer[outLen++] = 0x4e;
   1898                         break;
   1899                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1900                     }
   1901                 }
   1902 
   1903                 /* write the output bytes */
   1904                 if(len == 1) {
   1905                     buffer[outLen++] = (char)targetValue;
   1906                 } else /* len == 2 */ {
   1907                     buffer[outLen++] = (char)(targetValue >> 8);
   1908                     buffer[outLen++] = (char)targetValue;
   1909                 }
   1910             } else {
   1911                 /*
   1912                  * if we cannot find the character after checking all codepages
   1913                  * then this is an error
   1914                  */
   1915                 *err = U_INVALID_CHAR_FOUND;
   1916                 cnv->fromUChar32=sourceChar;
   1917                 break;
   1918             }
   1919 
   1920             if(sourceChar == CR || sourceChar == LF) {
   1921                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1922                 pFromU2022State->cs[2] = 0;
   1923                 choiceCount = 0;
   1924             }
   1925 
   1926             /* output outLen>0 bytes in buffer[] */
   1927             if(outLen == 1) {
   1928                 *target++ = buffer[0];
   1929                 if(offsets) {
   1930                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1931                 }
   1932             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1933                 *target++ = buffer[0];
   1934                 *target++ = buffer[1];
   1935                 if(offsets) {
   1936                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1937                     *offsets++ = sourceIndex;
   1938                     *offsets++ = sourceIndex;
   1939                 }
   1940             } else {
   1941                 fromUWriteUInt8(
   1942                     cnv,
   1943                     buffer, outLen,
   1944                     &target, (const char *)targetLimit,
   1945                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1946                     err);
   1947                 if(U_FAILURE(*err)) {
   1948                     break;
   1949                 }
   1950             }
   1951         } /* end if(myTargetIndex<myTargetLength) */
   1952         else{
   1953             *err =U_BUFFER_OVERFLOW_ERROR;
   1954             break;
   1955         }
   1956 
   1957     }/* end while(mySourceIndex<mySourceLength) */
   1958 
   1959     /*
   1960      * the end of the input stream and detection of truncated input
   1961      * are handled by the framework, but for ISO-2022-JP conversion
   1962      * we need to be in ASCII mode at the very end
   1963      *
   1964      * conditions:
   1965      *   successful
   1966      *   in SO mode or not in ASCII mode
   1967      *   end of input and no truncated input
   1968      */
   1969     if( U_SUCCESS(*err) &&
   1970         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   1971         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   1972     ) {
   1973         int32_t sourceIndex;
   1974 
   1975         outLen = 0;
   1976 
   1977         if(pFromU2022State->g != 0) {
   1978             buffer[outLen++] = UCNV_SI;
   1979             pFromU2022State->g = 0;
   1980         }
   1981 
   1982         if(pFromU2022State->cs[0] != ASCII) {
   1983             int32_t escLen = escSeqCharsLen[ASCII];
   1984             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   1985             outLen += escLen;
   1986             pFromU2022State->cs[0] = (int8_t)ASCII;
   1987         }
   1988 
   1989         /* get the source index of the last input character */
   1990         /*
   1991          * TODO this would be simpler and more reliable if we used a pair
   1992          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   1993          * so that we could simply use the prevSourceIndex here;
   1994          * this code gives an incorrect result for the rare case of an unmatched
   1995          * trail surrogate that is alone in the last buffer of the text stream
   1996          */
   1997         sourceIndex=(int32_t)(source-args->source);
   1998         if(sourceIndex>0) {
   1999             --sourceIndex;
   2000             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2001                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2002             ) {
   2003                 --sourceIndex;
   2004             }
   2005         } else {
   2006             sourceIndex=-1;
   2007         }
   2008 
   2009         fromUWriteUInt8(
   2010             cnv,
   2011             buffer, outLen,
   2012             &target, (const char *)targetLimit,
   2013             &offsets, sourceIndex,
   2014             err);
   2015     }
   2016 
   2017     /*save the state and return */
   2018     args->source = source;
   2019     args->target = (char*)target;
   2020 }
   2021 
   2022 /*************** to unicode *******************/
   2023 
   2024 static void
   2025 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2026                                                UErrorCode* err){
   2027     char tempBuf[2];
   2028     const char *mySource = (char *) args->source;
   2029     UChar *myTarget = args->target;
   2030     const char *mySourceLimit = args->sourceLimit;
   2031     uint32_t targetUniChar = 0x0000;
   2032     uint32_t mySourceChar = 0x0000;
   2033     uint32_t tmpSourceChar = 0x0000;
   2034     UConverterDataISO2022* myData;
   2035     ISO2022State *pToU2022State;
   2036     StateEnum cs;
   2037 
   2038     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2039     pToU2022State = &myData->toU2022State;
   2040 
   2041     if(myData->key != 0) {
   2042         /* continue with a partial escape sequence */
   2043         goto escape;
   2044     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2045         /* continue with a partial double-byte character */
   2046         mySourceChar = args->converter->toUBytes[0];
   2047         args->converter->toULength = 0;
   2048         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2049         targetUniChar = missingCharMarker;
   2050         goto getTrailByte;
   2051     }
   2052 
   2053     while(mySource < mySourceLimit){
   2054 
   2055         targetUniChar =missingCharMarker;
   2056 
   2057         if(myTarget < args->targetLimit){
   2058 
   2059             mySourceChar= (unsigned char) *mySource++;
   2060 
   2061             switch(mySourceChar) {
   2062             case UCNV_SI:
   2063                 if(myData->version==3) {
   2064                     pToU2022State->g=0;
   2065                     continue;
   2066                 } else {
   2067                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2068                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2069                     break;
   2070                 }
   2071 
   2072             case UCNV_SO:
   2073                 if(myData->version==3) {
   2074                     /* JIS7: switch to G1 half-width Katakana */
   2075                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2076                     pToU2022State->g=1;
   2077                     continue;
   2078                 } else {
   2079                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2080                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2081                     break;
   2082                 }
   2083 
   2084             case ESC_2022:
   2085                 mySource--;
   2086 escape:
   2087                 {
   2088                     const char * mySourceBefore = mySource;
   2089                     int8_t toULengthBefore = args->converter->toULength;
   2090 
   2091                     changeState_2022(args->converter,&(mySource),
   2092                         mySourceLimit, ISO_2022_JP,err);
   2093 
   2094                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2095                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2096                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2097                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2098                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   2099                     }
   2100                 }
   2101 
   2102                 /* invalid or illegal escape sequence */
   2103                 if(U_FAILURE(*err)){
   2104                     args->target = myTarget;
   2105                     args->source = mySource;
   2106                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2107                     return;
   2108                 }
   2109                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2110                 if(myData->key==0) {
   2111                     myData->isEmptySegment = TRUE;
   2112                 }
   2113                 continue;
   2114 
   2115             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2116 
   2117             case CR:
   2118                 /*falls through*/
   2119             case LF:
   2120                 /* automatically reset to single-byte mode */
   2121                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2122                     pToU2022State->cs[0] = (int8_t)ASCII;
   2123                 }
   2124                 pToU2022State->cs[2] = 0;
   2125                 pToU2022State->g = 0;
   2126                 /* falls through */
   2127             default:
   2128                 /* convert one or two bytes */
   2129                 myData->isEmptySegment = FALSE;
   2130                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2131                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2132                     !IS_JP_DBCS(cs)
   2133                 ) {
   2134                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2135                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2136 
   2137                     /* return from a single-shift state to the previous one */
   2138                     if(pToU2022State->g >= 2) {
   2139                         pToU2022State->g=pToU2022State->prevG;
   2140                     }
   2141                 } else switch(cs) {
   2142                 case ASCII:
   2143                     if(mySourceChar <= 0x7f) {
   2144                         targetUniChar = mySourceChar;
   2145                     }
   2146                     break;
   2147                 case ISO8859_1:
   2148                     if(mySourceChar <= 0x7f) {
   2149                         targetUniChar = mySourceChar + 0x80;
   2150                     }
   2151                     /* return from a single-shift state to the previous one */
   2152                     pToU2022State->g=pToU2022State->prevG;
   2153                     break;
   2154                 case ISO8859_7:
   2155                     if(mySourceChar <= 0x7f) {
   2156                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2157                         targetUniChar =
   2158                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2159                                 myData->myConverterArray[cs],
   2160                                 mySourceChar + 0x80);
   2161                     }
   2162                     /* return from a single-shift state to the previous one */
   2163                     pToU2022State->g=pToU2022State->prevG;
   2164                     break;
   2165                 case JISX201:
   2166                     if(mySourceChar <= 0x7f) {
   2167                         targetUniChar = jisx201ToU(mySourceChar);
   2168                     }
   2169                     break;
   2170                 case HWKANA_7BIT:
   2171                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2172                         /* 7-bit halfwidth Katakana */
   2173                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2174                     }
   2175                     break;
   2176                 default:
   2177                     /* G0 DBCS */
   2178                     if(mySource < mySourceLimit) {
   2179                         int leadIsOk, trailIsOk;
   2180                         uint8_t trailByte;
   2181 getTrailByte:
   2182                         trailByte = (uint8_t)*mySource;
   2183                         /*
   2184                          * Ticket 5691: consistent illegal sequences:
   2185                          * - We include at least the first byte in the illegal sequence.
   2186                          * - If any of the non-initial bytes could be the start of a character,
   2187                          *   we stop the illegal sequence before the first one of those.
   2188                          *
   2189                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2190                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2191                          * Otherwise we convert or report the pair of bytes.
   2192                          */
   2193                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2194                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2195                         if (leadIsOk && trailIsOk) {
   2196                             ++mySource;
   2197                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2198                             if(cs == JISX208) {
   2199                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2200                                 mySourceChar = tmpSourceChar;
   2201                             } else {
   2202                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2203                                 mySourceChar = tmpSourceChar;
   2204                                 if (cs == KSC5601) {
   2205                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2206                                 }
   2207                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2208                                 tempBuf[1] = (char)(tmpSourceChar);
   2209                             }
   2210                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2211                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2212                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2213                             ++mySource;
   2214                             /* add another bit so that the code below writes 2 bytes in case of error */
   2215                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2216                         }
   2217                     } else {
   2218                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2219                         args->converter->toULength = 1;
   2220                         goto endloop;
   2221                     }
   2222                 }  /* End of inner switch */
   2223                 break;
   2224             }  /* End of outer switch */
   2225             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2226                 if(args->offsets){
   2227                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2228                 }
   2229                 *(myTarget++)=(UChar)targetUniChar;
   2230             }
   2231             else if(targetUniChar > missingCharMarker){
   2232                 /* disassemble the surrogate pair and write to output*/
   2233                 targetUniChar-=0x0010000;
   2234                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2235                 if(args->offsets){
   2236                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2237                 }
   2238                 ++myTarget;
   2239                 if(myTarget< args->targetLimit){
   2240                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2241                     if(args->offsets){
   2242                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2243                     }
   2244                     ++myTarget;
   2245                 }else{
   2246                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2247                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2248                 }
   2249 
   2250             }
   2251             else{
   2252                 /* Call the callback function*/
   2253                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2254                 break;
   2255             }
   2256         }
   2257         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2258             *err =U_BUFFER_OVERFLOW_ERROR;
   2259             break;
   2260         }
   2261     }
   2262 endloop:
   2263     args->target = myTarget;
   2264     args->source = mySource;
   2265 }
   2266 
   2267 
   2268 /***************************************************************
   2269 *   Rules for ISO-2022-KR encoding
   2270 *   i) The KSC5601 designator sequence should appear only once in a file,
   2271 *      at the begining of a line before any KSC5601 characters. This usually
   2272 *      means that it appears by itself on the first line of the file
   2273 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2274 *      and SI to shift into single byte mode
   2275 */
   2276 static void
   2277 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2278 
   2279     UConverter* saveConv = args->converter;
   2280     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2281     args->converter=myConverterData->currentConverter;
   2282 
   2283     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2284     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2285     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2286 
   2287     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2288         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2289             uprv_memcpy(
   2290                 saveConv->charErrorBuffer,
   2291                 myConverterData->currentConverter->charErrorBuffer,
   2292                 myConverterData->currentConverter->charErrorBufferLength);
   2293         }
   2294         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2295         myConverterData->currentConverter->charErrorBufferLength = 0;
   2296     }
   2297     args->converter=saveConv;
   2298 }
   2299 
   2300 static void
   2301 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2302 
   2303     const UChar *source = args->source;
   2304     const UChar *sourceLimit = args->sourceLimit;
   2305     unsigned char *target = (unsigned char *) args->target;
   2306     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2307     int32_t* offsets = args->offsets;
   2308     uint32_t targetByteUnit = 0x0000;
   2309     UChar32 sourceChar = 0x0000;
   2310     UBool isTargetByteDBCS;
   2311     UBool oldIsTargetByteDBCS;
   2312     UConverterDataISO2022 *converterData;
   2313     UConverterSharedData* sharedData;
   2314     UBool useFallback;
   2315     int32_t length =0;
   2316 
   2317     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2318     /* if the version is 1 then the user is requesting
   2319      * conversion with ibm-25546 pass the arguments to
   2320      * MBCS converter and return
   2321      */
   2322     if(converterData->version==1){
   2323         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2324         return;
   2325     }
   2326 
   2327     /* initialize data */
   2328     sharedData = converterData->currentConverter->sharedData;
   2329     useFallback = args->converter->useFallback;
   2330     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2331     oldIsTargetByteDBCS = isTargetByteDBCS;
   2332 
   2333     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2334     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2335         goto getTrail;
   2336     }
   2337     while(source < sourceLimit){
   2338 
   2339         targetByteUnit = missingCharMarker;
   2340 
   2341         if(target < (unsigned char*) args->targetLimit){
   2342             sourceChar = *source++;
   2343 
   2344             /* do not convert SO/SI/ESC */
   2345             if(IS_2022_CONTROL(sourceChar)) {
   2346                 /* callback(illegal) */
   2347                 *err=U_ILLEGAL_CHAR_FOUND;
   2348                 args->converter->fromUChar32=sourceChar;
   2349                 break;
   2350             }
   2351 
   2352             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2353             if(length < 0) {
   2354                 length = -length;  /* fallback */
   2355             }
   2356             /* only DBCS or SBCS characters are expected*/
   2357             /* DB characters with high bit set to 1 are expected */
   2358             if( length > 2 || length==0 ||
   2359                 (length == 1 && targetByteUnit > 0x7f) ||
   2360                 (length == 2 &&
   2361                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2362                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2363             ) {
   2364                 targetByteUnit=missingCharMarker;
   2365             }
   2366             if (targetByteUnit != missingCharMarker){
   2367 
   2368                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2369                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2370                   /* append the shift sequence */
   2371                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2372 
   2373                     if (isTargetByteDBCS)
   2374                         *target++ = UCNV_SO;
   2375                     else
   2376                         *target++ = UCNV_SI;
   2377                     if(offsets)
   2378                         *(offsets++) = (int32_t)(source - args->source-1);
   2379                 }
   2380                 /* write the targetUniChar  to target */
   2381                 if(targetByteUnit <= 0x00FF){
   2382                     if( target < targetLimit){
   2383                         *(target++) = (unsigned char) targetByteUnit;
   2384                         if(offsets){
   2385                             *(offsets++) = (int32_t)(source - args->source-1);
   2386                         }
   2387 
   2388                     }else{
   2389                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2390                         *err = U_BUFFER_OVERFLOW_ERROR;
   2391                     }
   2392                 }else{
   2393                     if(target < targetLimit){
   2394                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2395                         if(offsets){
   2396                             *(offsets++) = (int32_t)(source - args->source-1);
   2397                         }
   2398                         if(target < targetLimit){
   2399                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2400                             if(offsets){
   2401                                 *(offsets++) = (int32_t)(source - args->source-1);
   2402                             }
   2403                         }else{
   2404                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2405                             *err = U_BUFFER_OVERFLOW_ERROR;
   2406                         }
   2407                     }else{
   2408                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2409                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2410                         *err = U_BUFFER_OVERFLOW_ERROR;
   2411                     }
   2412                 }
   2413 
   2414             }
   2415             else{
   2416                 /* oops.. the code point is unassingned
   2417                  * set the error and reason
   2418                  */
   2419 
   2420                 /*check if the char is a First surrogate*/
   2421                 if(UTF_IS_SURROGATE(sourceChar)) {
   2422                     if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2423 getTrail:
   2424                         /*look ahead to find the trail surrogate*/
   2425                         if(source <  sourceLimit) {
   2426                             /* test the following code unit */
   2427                             UChar trail=(UChar) *source;
   2428                             if(UTF_IS_SECOND_SURROGATE(trail)) {
   2429                                 source++;
   2430                                 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2431                                 *err = U_INVALID_CHAR_FOUND;
   2432                                 /* convert this surrogate code point */
   2433                                 /* exit this condition tree */
   2434                             } else {
   2435                                 /* this is an unmatched lead code unit (1st surrogate) */
   2436                                 /* callback(illegal) */
   2437                                 *err=U_ILLEGAL_CHAR_FOUND;
   2438                             }
   2439                         } else {
   2440                             /* no more input */
   2441                             *err = U_ZERO_ERROR;
   2442                         }
   2443                     } else {
   2444                         /* this is an unmatched trail code unit (2nd surrogate) */
   2445                         /* callback(illegal) */
   2446                         *err=U_ILLEGAL_CHAR_FOUND;
   2447                     }
   2448                 } else {
   2449                     /* callback(unassigned) for a BMP code point */
   2450                     *err = U_INVALID_CHAR_FOUND;
   2451                 }
   2452 
   2453                 args->converter->fromUChar32=sourceChar;
   2454                 break;
   2455             }
   2456         } /* end if(myTargetIndex<myTargetLength) */
   2457         else{
   2458             *err =U_BUFFER_OVERFLOW_ERROR;
   2459             break;
   2460         }
   2461 
   2462     }/* end while(mySourceIndex<mySourceLength) */
   2463 
   2464     /*
   2465      * the end of the input stream and detection of truncated input
   2466      * are handled by the framework, but for ISO-2022-KR conversion
   2467      * we need to be in ASCII mode at the very end
   2468      *
   2469      * conditions:
   2470      *   successful
   2471      *   not in ASCII mode
   2472      *   end of input and no truncated input
   2473      */
   2474     if( U_SUCCESS(*err) &&
   2475         isTargetByteDBCS &&
   2476         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2477     ) {
   2478         int32_t sourceIndex;
   2479 
   2480         /* we are switching to ASCII */
   2481         isTargetByteDBCS=FALSE;
   2482 
   2483         /* get the source index of the last input character */
   2484         /*
   2485          * TODO this would be simpler and more reliable if we used a pair
   2486          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2487          * so that we could simply use the prevSourceIndex here;
   2488          * this code gives an incorrect result for the rare case of an unmatched
   2489          * trail surrogate that is alone in the last buffer of the text stream
   2490          */
   2491         sourceIndex=(int32_t)(source-args->source);
   2492         if(sourceIndex>0) {
   2493             --sourceIndex;
   2494             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2495                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2496             ) {
   2497                 --sourceIndex;
   2498             }
   2499         } else {
   2500             sourceIndex=-1;
   2501         }
   2502 
   2503         fromUWriteUInt8(
   2504             args->converter,
   2505             SHIFT_IN_STR, 1,
   2506             &target, (const char *)targetLimit,
   2507             &offsets, sourceIndex,
   2508             err);
   2509     }
   2510 
   2511     /*save the state and return */
   2512     args->source = source;
   2513     args->target = (char*)target;
   2514     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2515 }
   2516 
   2517 /************************ To Unicode ***************************************/
   2518 
   2519 static void
   2520 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2521                                                             UErrorCode* err){
   2522     char const* sourceStart;
   2523     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2524 
   2525     UConverterToUnicodeArgs subArgs;
   2526     int32_t minArgsSize;
   2527 
   2528     /* set up the subconverter arguments */
   2529     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2530         minArgsSize = args->size;
   2531     } else {
   2532         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2533     }
   2534 
   2535     uprv_memcpy(&subArgs, args, minArgsSize);
   2536     subArgs.size = (uint16_t)minArgsSize;
   2537     subArgs.converter = myData->currentConverter;
   2538 
   2539     /* remember the original start of the input for offsets */
   2540     sourceStart = args->source;
   2541 
   2542     if(myData->key != 0) {
   2543         /* continue with a partial escape sequence */
   2544         goto escape;
   2545     }
   2546 
   2547     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2548         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2549         subArgs.source = args->source;
   2550         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2551         if(subArgs.source != subArgs.sourceLimit) {
   2552             /*
   2553              * get the current partial byte sequence
   2554              *
   2555              * it needs to be moved between the public and the subconverter
   2556              * so that the conversion framework, which only sees the public
   2557              * converter, can handle truncated and illegal input etc.
   2558              */
   2559             if(args->converter->toULength > 0) {
   2560                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2561             }
   2562             subArgs.converter->toULength = args->converter->toULength;
   2563 
   2564             /*
   2565              * Convert up to the end of the input, or to before the next escape character.
   2566              * Does not handle conversion extensions because the preToU[] state etc.
   2567              * is not copied.
   2568              */
   2569             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2570 
   2571             if(args->offsets != NULL && sourceStart != args->source) {
   2572                 /* update offsets to base them on the actual start of the input */
   2573                 int32_t *offsets = args->offsets;
   2574                 UChar *target = args->target;
   2575                 int32_t delta = (int32_t)(args->source - sourceStart);
   2576                 while(target < subArgs.target) {
   2577                     if(*offsets >= 0) {
   2578                         *offsets += delta;
   2579                     }
   2580                     ++offsets;
   2581                     ++target;
   2582                 }
   2583             }
   2584             args->source = subArgs.source;
   2585             args->target = subArgs.target;
   2586             args->offsets = subArgs.offsets;
   2587 
   2588             /* copy input/error/overflow buffers */
   2589             if(subArgs.converter->toULength > 0) {
   2590                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2591             }
   2592             args->converter->toULength = subArgs.converter->toULength;
   2593 
   2594             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2595                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2596                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2597                                 subArgs.converter->UCharErrorBufferLength);
   2598                 }
   2599                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2600                 subArgs.converter->UCharErrorBufferLength = 0;
   2601             }
   2602         }
   2603 
   2604         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2605             return;
   2606         }
   2607 
   2608 escape:
   2609         changeState_2022(args->converter,
   2610                &(args->source),
   2611                args->sourceLimit,
   2612                ISO_2022_KR,
   2613                err);
   2614     }
   2615 }
   2616 
   2617 static void
   2618 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2619                                                             UErrorCode* err){
   2620     char tempBuf[2];
   2621     const char *mySource = ( char *) args->source;
   2622     UChar *myTarget = args->target;
   2623     const char *mySourceLimit = args->sourceLimit;
   2624     UChar32 targetUniChar = 0x0000;
   2625     UChar mySourceChar = 0x0000;
   2626     UConverterDataISO2022* myData;
   2627     UConverterSharedData* sharedData ;
   2628     UBool useFallback;
   2629 
   2630     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2631     if(myData->version==1){
   2632         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2633         return;
   2634     }
   2635 
   2636     /* initialize state */
   2637     sharedData = myData->currentConverter->sharedData;
   2638     useFallback = args->converter->useFallback;
   2639 
   2640     if(myData->key != 0) {
   2641         /* continue with a partial escape sequence */
   2642         goto escape;
   2643     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2644         /* continue with a partial double-byte character */
   2645         mySourceChar = args->converter->toUBytes[0];
   2646         args->converter->toULength = 0;
   2647         goto getTrailByte;
   2648     }
   2649 
   2650     while(mySource< mySourceLimit){
   2651 
   2652         if(myTarget < args->targetLimit){
   2653 
   2654             mySourceChar= (unsigned char) *mySource++;
   2655 
   2656             if(mySourceChar==UCNV_SI){
   2657                 myData->toU2022State.g = 0;
   2658                 if (myData->isEmptySegment) {
   2659                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2660                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2661                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2662                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2663                     args->converter->toULength = 1;
   2664                     args->target = myTarget;
   2665                     args->source = mySource;
   2666                     return;
   2667                 }
   2668                 /*consume the source */
   2669                 continue;
   2670             }else if(mySourceChar==UCNV_SO){
   2671                 myData->toU2022State.g = 1;
   2672                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2673                 /*consume the source */
   2674                 continue;
   2675             }else if(mySourceChar==ESC_2022){
   2676                 mySource--;
   2677 escape:
   2678                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2679                 changeState_2022(args->converter,&(mySource),
   2680                                 mySourceLimit, ISO_2022_KR, err);
   2681                 if(U_FAILURE(*err)){
   2682                     args->target = myTarget;
   2683                     args->source = mySource;
   2684                     return;
   2685                 }
   2686                 continue;
   2687             }
   2688 
   2689             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2690             if(myData->toU2022State.g == 1) {
   2691                 if(mySource < mySourceLimit) {
   2692                     int leadIsOk, trailIsOk;
   2693                     uint8_t trailByte;
   2694 getTrailByte:
   2695                     targetUniChar = missingCharMarker;
   2696                     trailByte = (uint8_t)*mySource;
   2697                     /*
   2698                      * Ticket 5691: consistent illegal sequences:
   2699                      * - We include at least the first byte in the illegal sequence.
   2700                      * - If any of the non-initial bytes could be the start of a character,
   2701                      *   we stop the illegal sequence before the first one of those.
   2702                      *
   2703                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2704                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2705                      * Otherwise we convert or report the pair of bytes.
   2706                      */
   2707                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2708                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2709                     if (leadIsOk && trailIsOk) {
   2710                         ++mySource;
   2711                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2712                         tempBuf[1] = (char)(trailByte + 0x80);
   2713                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2714                         mySourceChar = (mySourceChar << 8) | trailByte;
   2715                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2716                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2717                         ++mySource;
   2718                         /* add another bit so that the code below writes 2 bytes in case of error */
   2719                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2720                     }
   2721                 } else {
   2722                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2723                     args->converter->toULength = 1;
   2724                     break;
   2725                 }
   2726             }
   2727             else if(mySourceChar <= 0x7f) {
   2728                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2729             } else {
   2730                 targetUniChar = 0xffff;
   2731             }
   2732             if(targetUniChar < 0xfffe){
   2733                 if(args->offsets) {
   2734                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2735                 }
   2736                 *(myTarget++)=(UChar)targetUniChar;
   2737             }
   2738             else {
   2739                 /* Call the callback function*/
   2740                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2741                 break;
   2742             }
   2743         }
   2744         else{
   2745             *err =U_BUFFER_OVERFLOW_ERROR;
   2746             break;
   2747         }
   2748     }
   2749     args->target = myTarget;
   2750     args->source = mySource;
   2751 }
   2752 
   2753 /*************************** END ISO2022-KR *********************************/
   2754 
   2755 /*************************** ISO-2022-CN *********************************
   2756 *
   2757 * Rules for ISO-2022-CN Encoding:
   2758 * i)   The designator sequence must appear once on a line before any instance
   2759 *      of character set it designates.
   2760 * ii)  If two lines contain characters from the same character set, both lines
   2761 *      must include the designator sequence.
   2762 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2763 *      to invoke the  shifting
   2764 * iv)  All lines start in ASCII and end in ASCII.
   2765 * v)   Four shifting sequences are employed for this purpose:
   2766 *
   2767 *      Sequcence   ASCII Eq    Charsets
   2768 *      ----------  -------    ---------
   2769 *      SI           <SI>        US-ASCII
   2770 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2771 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2772 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2773 *
   2774 * vi)
   2775 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2776 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2777 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2778 *
   2779 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2780 *       characters as defined in GB 2312-80, until
   2781 *       another SOdesignation appears
   2782 *
   2783 *
   2784 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2785 *       in ISO-IR-165 (for details, see section 2.1),
   2786 *       until another SOdesignation appears
   2787 *
   2788 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2789 *       in CNS 11643-plane-1, until another
   2790 *       SOdesignation appears
   2791 *
   2792 *      ESC $ * H       Indicates the two bytes immediately following
   2793 *       SS2 is a Chinese character as defined in CNS
   2794 *       11643-plane-2, until another SS2designation
   2795 *       appears
   2796 *       (Meaning <ESC>N must preceed every 2 byte
   2797 *        sequence.)
   2798 *
   2799 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2800 *       is a Chinese character as defined in CNS
   2801 *       11643-plane-3, until another SS3designation
   2802 *       appears
   2803 *       (Meaning <ESC>O must preceed every 2 byte
   2804 *        sequence.)
   2805 *
   2806 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2807 *       is a Chinese character as defined in CNS
   2808 *       11643-plane-4, until another SS3designation
   2809 *       appears
   2810 *       (In English: <ESC>O must preceed every 2 byte
   2811 *        sequence.)
   2812 *
   2813 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2814 *       is a Chinese character as defined in CNS
   2815 *       11643-plane-5, until another SS3designation
   2816 *       appears
   2817 *
   2818 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2819 *       is a Chinese character as defined in CNS
   2820 *       11643-plane-6, until another SS3designation
   2821 *       appears
   2822 *
   2823 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2824 *       is a Chinese character as defined in CNS
   2825 *       11643-plane-7, until another SS3designation
   2826 *       appears
   2827 *
   2828 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2829 *       has its own designation information before any Chinese characters
   2830 *       appear
   2831 *
   2832 */
   2833 
   2834 /* The following are defined this way to make the strings truely readonly */
   2835 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2836 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2837 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2838 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2839 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2840 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2841 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2842 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2843 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2844 
   2845 /********************** ISO2022-CN Data **************************/
   2846 static const char* const escSeqCharsCN[10] ={
   2847         SHIFT_IN_STR,           /* ASCII */
   2848         GB_2312_80_STR,
   2849         ISO_IR_165_STR,
   2850         CNS_11643_1992_Plane_1_STR,
   2851         CNS_11643_1992_Plane_2_STR,
   2852         CNS_11643_1992_Plane_3_STR,
   2853         CNS_11643_1992_Plane_4_STR,
   2854         CNS_11643_1992_Plane_5_STR,
   2855         CNS_11643_1992_Plane_6_STR,
   2856         CNS_11643_1992_Plane_7_STR
   2857 };
   2858 
   2859 static void
   2860 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2861     UConverter *cnv = args->converter;
   2862     UConverterDataISO2022 *converterData;
   2863     ISO2022State *pFromU2022State;
   2864     uint8_t *target = (uint8_t *) args->target;
   2865     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2866     const UChar* source = args->source;
   2867     const UChar* sourceLimit = args->sourceLimit;
   2868     int32_t* offsets = args->offsets;
   2869     UChar32 sourceChar;
   2870     char buffer[8];
   2871     int32_t len;
   2872     int8_t choices[3];
   2873     int32_t choiceCount;
   2874     uint32_t targetValue = 0;
   2875     UBool useFallback;
   2876 
   2877     /* set up the state */
   2878     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2879     pFromU2022State   = &converterData->fromU2022State;
   2880 
   2881     choiceCount = 0;
   2882 
   2883     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2884     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2885         goto getTrail;
   2886     }
   2887 
   2888     while( source < sourceLimit){
   2889         if(target < targetLimit){
   2890 
   2891             sourceChar  = *(source++);
   2892             /*check if the char is a First surrogate*/
   2893              if(UTF_IS_SURROGATE(sourceChar)) {
   2894                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2895 getTrail:
   2896                     /*look ahead to find the trail surrogate*/
   2897                     if(source < sourceLimit) {
   2898                         /* test the following code unit */
   2899                         UChar trail=(UChar) *source;
   2900                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   2901                             source++;
   2902                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2903                             cnv->fromUChar32=0x00;
   2904                             /* convert this supplementary code point */
   2905                             /* exit this condition tree */
   2906                         } else {
   2907                             /* this is an unmatched lead code unit (1st surrogate) */
   2908                             /* callback(illegal) */
   2909                             *err=U_ILLEGAL_CHAR_FOUND;
   2910                             cnv->fromUChar32=sourceChar;
   2911                             break;
   2912                         }
   2913                     } else {
   2914                         /* no more input */
   2915                         cnv->fromUChar32=sourceChar;
   2916                         break;
   2917                     }
   2918                 } else {
   2919                     /* this is an unmatched trail code unit (2nd surrogate) */
   2920                     /* callback(illegal) */
   2921                     *err=U_ILLEGAL_CHAR_FOUND;
   2922                     cnv->fromUChar32=sourceChar;
   2923                     break;
   2924                 }
   2925             }
   2926 
   2927             /* do the conversion */
   2928             if(sourceChar <= 0x007f ){
   2929                 /* do not convert SO/SI/ESC */
   2930                 if(IS_2022_CONTROL(sourceChar)) {
   2931                     /* callback(illegal) */
   2932                     *err=U_ILLEGAL_CHAR_FOUND;
   2933                     cnv->fromUChar32=sourceChar;
   2934                     break;
   2935                 }
   2936 
   2937                 /* US-ASCII */
   2938                 if(pFromU2022State->g == 0) {
   2939                     buffer[0] = (char)sourceChar;
   2940                     len = 1;
   2941                 } else {
   2942                     buffer[0] = UCNV_SI;
   2943                     buffer[1] = (char)sourceChar;
   2944                     len = 2;
   2945                     pFromU2022State->g = 0;
   2946                     choiceCount = 0;
   2947                 }
   2948                 if(sourceChar == CR || sourceChar == LF) {
   2949                     /* reset the state at the end of a line */
   2950                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2951                     choiceCount = 0;
   2952                 }
   2953             }
   2954             else{
   2955                 /* convert U+0080..U+10ffff */
   2956                 int32_t i;
   2957                 int8_t cs, g;
   2958 
   2959                 if(choiceCount == 0) {
   2960                     /* try the current SO/G1 converter first */
   2961                     choices[0] = pFromU2022State->cs[1];
   2962 
   2963                     /* default to GB2312_1 if none is designated yet */
   2964                     if(choices[0] == 0) {
   2965                         choices[0] = GB2312_1;
   2966                     }
   2967 
   2968                     if(converterData->version == 0) {
   2969                         /* ISO-2022-CN */
   2970 
   2971                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   2972                         if(choices[0] == GB2312_1) {
   2973                             choices[1] = (int8_t)CNS_11643_1;
   2974                         } else {
   2975                             choices[1] = (int8_t)GB2312_1;
   2976                         }
   2977 
   2978                         choiceCount = 2;
   2979                     } else if (converterData->version == 1) {
   2980                         /* ISO-2022-CN-EXT */
   2981 
   2982                         /* try one of the other converters */
   2983                         switch(choices[0]) {
   2984                         case GB2312_1:
   2985                             choices[1] = (int8_t)CNS_11643_1;
   2986                             choices[2] = (int8_t)ISO_IR_165;
   2987                             break;
   2988                         case ISO_IR_165:
   2989                             choices[1] = (int8_t)GB2312_1;
   2990                             choices[2] = (int8_t)CNS_11643_1;
   2991                             break;
   2992                         default: /* CNS_11643_x */
   2993                             choices[1] = (int8_t)GB2312_1;
   2994                             choices[2] = (int8_t)ISO_IR_165;
   2995                             break;
   2996                         }
   2997 
   2998                         choiceCount = 3;
   2999                     } else {
   3000                         choices[0] = (int8_t)CNS_11643_1;
   3001                         choices[1] = (int8_t)GB2312_1;
   3002                     }
   3003                 }
   3004 
   3005                 cs = g = 0;
   3006                 /*
   3007                  * len==0: no mapping found yet
   3008                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3009                  * len>0: found a roundtrip result, done
   3010                  */
   3011                 len = 0;
   3012                 /*
   3013                  * We will turn off useFallback after finding a fallback,
   3014                  * but we still get fallbacks from PUA code points as usual.
   3015                  * Therefore, we will also need to check that we don't overwrite
   3016                  * an early fallback with a later one.
   3017                  */
   3018                 useFallback = cnv->useFallback;
   3019 
   3020                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3021                     int8_t cs0 = choices[i];
   3022                     if(cs0 > 0) {
   3023                         uint32_t value;
   3024                         int32_t len2;
   3025                         if(cs0 >= CNS_11643_0) {
   3026                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3027                                         converterData->myConverterArray[CNS_11643],
   3028                                         sourceChar,
   3029                                         &value,
   3030                                         useFallback,
   3031                                         MBCS_OUTPUT_3);
   3032                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3033                                 targetValue = value;
   3034                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3035                                 if(len2 >= 0) {
   3036                                     len = 2;
   3037                                 } else {
   3038                                     len = -2;
   3039                                     useFallback = FALSE;
   3040                                 }
   3041                                 if(cs == CNS_11643_1) {
   3042                                     g = 1;
   3043                                 } else if(cs == CNS_11643_2) {
   3044                                     g = 2;
   3045                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3046                                     g = 3;
   3047                                 } else {
   3048                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3049                                     len = 0;
   3050                                 }
   3051                             }
   3052                         } else {
   3053                             /* GB2312_1 or ISO-IR-165 */
   3054                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3055                                         converterData->myConverterArray[cs0],
   3056                                         sourceChar,
   3057                                         &value,
   3058                                         useFallback,
   3059                                         MBCS_OUTPUT_2);
   3060                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3061                                 targetValue = value;
   3062                                 len = len2;
   3063                                 cs = cs0;
   3064                                 g = 1;
   3065                                 useFallback = FALSE;
   3066                             }
   3067                         }
   3068                     }
   3069                 }
   3070 
   3071                 if(len != 0) {
   3072                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3073 
   3074                     /* write the designation sequence if necessary */
   3075                     if(cs != pFromU2022State->cs[g]) {
   3076                         if(cs < CNS_11643) {
   3077                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3078                         } else {
   3079                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3080                         }
   3081                         len = 4;
   3082                         pFromU2022State->cs[g] = cs;
   3083                         if(g == 1) {
   3084                             /* changing the SO/G1 charset invalidates the choices[] */
   3085                             choiceCount = 0;
   3086                         }
   3087                     }
   3088 
   3089                     /* write the shift sequence if necessary */
   3090                     if(g != pFromU2022State->g) {
   3091                         switch(g) {
   3092                         case 1:
   3093                             buffer[len++] = UCNV_SO;
   3094 
   3095                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3096                             pFromU2022State->g = 1;
   3097                             break;
   3098                         case 2:
   3099                             buffer[len++] = 0x1b;
   3100                             buffer[len++] = 0x4e;
   3101                             break;
   3102                         default: /* case 3 */
   3103                             buffer[len++] = 0x1b;
   3104                             buffer[len++] = 0x4f;
   3105                             break;
   3106                         }
   3107                     }
   3108 
   3109                     /* write the two output bytes */
   3110                     buffer[len++] = (char)(targetValue >> 8);
   3111                     buffer[len++] = (char)targetValue;
   3112                 } else {
   3113                     /* if we cannot find the character after checking all codepages
   3114                      * then this is an error
   3115                      */
   3116                     *err = U_INVALID_CHAR_FOUND;
   3117                     cnv->fromUChar32=sourceChar;
   3118                     break;
   3119                 }
   3120             }
   3121 
   3122             /* output len>0 bytes in buffer[] */
   3123             if(len == 1) {
   3124                 *target++ = buffer[0];
   3125                 if(offsets) {
   3126                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3127                 }
   3128             } else if(len == 2 && (target + 2) <= targetLimit) {
   3129                 *target++ = buffer[0];
   3130                 *target++ = buffer[1];
   3131                 if(offsets) {
   3132                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3133                     *offsets++ = sourceIndex;
   3134                     *offsets++ = sourceIndex;
   3135                 }
   3136             } else {
   3137                 fromUWriteUInt8(
   3138                     cnv,
   3139                     buffer, len,
   3140                     &target, (const char *)targetLimit,
   3141                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3142                     err);
   3143                 if(U_FAILURE(*err)) {
   3144                     break;
   3145                 }
   3146             }
   3147         } /* end if(myTargetIndex<myTargetLength) */
   3148         else{
   3149             *err =U_BUFFER_OVERFLOW_ERROR;
   3150             break;
   3151         }
   3152 
   3153     }/* end while(mySourceIndex<mySourceLength) */
   3154 
   3155     /*
   3156      * the end of the input stream and detection of truncated input
   3157      * are handled by the framework, but for ISO-2022-CN conversion
   3158      * we need to be in ASCII mode at the very end
   3159      *
   3160      * conditions:
   3161      *   successful
   3162      *   not in ASCII mode
   3163      *   end of input and no truncated input
   3164      */
   3165     if( U_SUCCESS(*err) &&
   3166         pFromU2022State->g!=0 &&
   3167         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3168     ) {
   3169         int32_t sourceIndex;
   3170 
   3171         /* we are switching to ASCII */
   3172         pFromU2022State->g=0;
   3173 
   3174         /* get the source index of the last input character */
   3175         /*
   3176          * TODO this would be simpler and more reliable if we used a pair
   3177          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3178          * so that we could simply use the prevSourceIndex here;
   3179          * this code gives an incorrect result for the rare case of an unmatched
   3180          * trail surrogate that is alone in the last buffer of the text stream
   3181          */
   3182         sourceIndex=(int32_t)(source-args->source);
   3183         if(sourceIndex>0) {
   3184             --sourceIndex;
   3185             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3186                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3187             ) {
   3188                 --sourceIndex;
   3189             }
   3190         } else {
   3191             sourceIndex=-1;
   3192         }
   3193 
   3194         fromUWriteUInt8(
   3195             cnv,
   3196             SHIFT_IN_STR, 1,
   3197             &target, (const char *)targetLimit,
   3198             &offsets, sourceIndex,
   3199             err);
   3200     }
   3201 
   3202     /*save the state and return */
   3203     args->source = source;
   3204     args->target = (char*)target;
   3205 }
   3206 
   3207 
   3208 static void
   3209 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3210                                                UErrorCode* err){
   3211     char tempBuf[3];
   3212     const char *mySource = (char *) args->source;
   3213     UChar *myTarget = args->target;
   3214     const char *mySourceLimit = args->sourceLimit;
   3215     uint32_t targetUniChar = 0x0000;
   3216     uint32_t mySourceChar = 0x0000;
   3217     UConverterDataISO2022* myData;
   3218     ISO2022State *pToU2022State;
   3219 
   3220     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3221     pToU2022State = &myData->toU2022State;
   3222 
   3223     if(myData->key != 0) {
   3224         /* continue with a partial escape sequence */
   3225         goto escape;
   3226     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3227         /* continue with a partial double-byte character */
   3228         mySourceChar = args->converter->toUBytes[0];
   3229         args->converter->toULength = 0;
   3230         targetUniChar = missingCharMarker;
   3231         goto getTrailByte;
   3232     }
   3233 
   3234     while(mySource < mySourceLimit){
   3235 
   3236         targetUniChar =missingCharMarker;
   3237 
   3238         if(myTarget < args->targetLimit){
   3239 
   3240             mySourceChar= (unsigned char) *mySource++;
   3241 
   3242             switch(mySourceChar){
   3243             case UCNV_SI:
   3244                 pToU2022State->g=0;
   3245                 if (myData->isEmptySegment) {
   3246                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3247                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3248                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3249                     args->converter->toUBytes[0] = mySourceChar;
   3250                     args->converter->toULength = 1;
   3251                     args->target = myTarget;
   3252                     args->source = mySource;
   3253                     return;
   3254                 }
   3255                 continue;
   3256 
   3257             case UCNV_SO:
   3258                 if(pToU2022State->cs[1] != 0) {
   3259                     pToU2022State->g=1;
   3260                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3261                     continue;
   3262                 } else {
   3263                     /* illegal to have SO before a matching designator */
   3264                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3265                     break;
   3266                 }
   3267 
   3268             case ESC_2022:
   3269                 mySource--;
   3270 escape:
   3271                 {
   3272                     const char * mySourceBefore = mySource;
   3273                     int8_t toULengthBefore = args->converter->toULength;
   3274 
   3275                     changeState_2022(args->converter,&(mySource),
   3276                         mySourceLimit, ISO_2022_CN,err);
   3277 
   3278                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3279                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3280                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3281                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3282                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   3283                     }
   3284                 }
   3285 
   3286                 /* invalid or illegal escape sequence */
   3287                 if(U_FAILURE(*err)){
   3288                     args->target = myTarget;
   3289                     args->source = mySource;
   3290                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3291                     return;
   3292                 }
   3293                 continue;
   3294 
   3295             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3296 
   3297             case CR:
   3298                 /*falls through*/
   3299             case LF:
   3300                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3301                 /* falls through */
   3302             default:
   3303                 /* convert one or two bytes */
   3304                 myData->isEmptySegment = FALSE;
   3305                 if(pToU2022State->g != 0) {
   3306                     if(mySource < mySourceLimit) {
   3307                         UConverterSharedData *cnv;
   3308                         StateEnum tempState;
   3309                         int32_t tempBufLen;
   3310                         int leadIsOk, trailIsOk;
   3311                         uint8_t trailByte;
   3312 getTrailByte:
   3313                         trailByte = (uint8_t)*mySource;
   3314                         /*
   3315                          * Ticket 5691: consistent illegal sequences:
   3316                          * - We include at least the first byte in the illegal sequence.
   3317                          * - If any of the non-initial bytes could be the start of a character,
   3318                          *   we stop the illegal sequence before the first one of those.
   3319                          *
   3320                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3321                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3322                          * Otherwise we convert or report the pair of bytes.
   3323                          */
   3324                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3325                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3326                         if (leadIsOk && trailIsOk) {
   3327                             ++mySource;
   3328                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3329                             if(tempState >= CNS_11643_0) {
   3330                                 cnv = myData->myConverterArray[CNS_11643];
   3331                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3332                                 tempBuf[1] = (char) (mySourceChar);
   3333                                 tempBuf[2] = (char) trailByte;
   3334                                 tempBufLen = 3;
   3335 
   3336                             }else{
   3337                                 cnv = myData->myConverterArray[tempState];
   3338                                 tempBuf[0] = (char) (mySourceChar);
   3339                                 tempBuf[1] = (char) trailByte;
   3340                                 tempBufLen = 2;
   3341                             }
   3342                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3343                             mySourceChar = (mySourceChar << 8) | trailByte;
   3344                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3345                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3346                             ++mySource;
   3347                             /* add another bit so that the code below writes 2 bytes in case of error */
   3348                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3349                         }
   3350                         if(pToU2022State->g>=2) {
   3351                             /* return from a single-shift state to the previous one */
   3352                             pToU2022State->g=pToU2022State->prevG;
   3353                         }
   3354                     } else {
   3355                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3356                         args->converter->toULength = 1;
   3357                         goto endloop;
   3358                     }
   3359                 }
   3360                 else{
   3361                     if(mySourceChar <= 0x7f) {
   3362                         targetUniChar = (UChar) mySourceChar;
   3363                     }
   3364                 }
   3365                 break;
   3366             }
   3367             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3368                 if(args->offsets){
   3369                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3370                 }
   3371                 *(myTarget++)=(UChar)targetUniChar;
   3372             }
   3373             else if(targetUniChar > missingCharMarker){
   3374                 /* disassemble the surrogate pair and write to output*/
   3375                 targetUniChar-=0x0010000;
   3376                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3377                 if(args->offsets){
   3378                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3379                 }
   3380                 ++myTarget;
   3381                 if(myTarget< args->targetLimit){
   3382                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3383                     if(args->offsets){
   3384                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3385                     }
   3386                     ++myTarget;
   3387                 }else{
   3388                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3389                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3390                 }
   3391 
   3392             }
   3393             else{
   3394                 /* Call the callback function*/
   3395                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3396                 break;
   3397             }
   3398         }
   3399         else{
   3400             *err =U_BUFFER_OVERFLOW_ERROR;
   3401             break;
   3402         }
   3403     }
   3404 endloop:
   3405     args->target = myTarget;
   3406     args->source = mySource;
   3407 }
   3408 
   3409 static void
   3410 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3411     UConverter *cnv = args->converter;
   3412     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3413     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3414     char *p, *subchar;
   3415     char buffer[8];
   3416     int32_t length;
   3417 
   3418     subchar=(char *)cnv->subChars;
   3419     length=cnv->subCharLen; /* assume length==1 for most variants */
   3420 
   3421     p = buffer;
   3422     switch(myConverterData->locale[0]){
   3423     case 'j':
   3424         {
   3425             int8_t cs;
   3426 
   3427             if(pFromU2022State->g == 1) {
   3428                 /* JIS7: switch from G1 to G0 */
   3429                 pFromU2022State->g = 0;
   3430                 *p++ = UCNV_SI;
   3431             }
   3432 
   3433             cs = pFromU2022State->cs[0];
   3434             if(cs != ASCII && cs != JISX201) {
   3435                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3436                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3437                 *p++ = '\x1b';
   3438                 *p++ = '\x28';
   3439                 *p++ = '\x42';
   3440             }
   3441 
   3442             *p++ = subchar[0];
   3443             break;
   3444         }
   3445     case 'c':
   3446         if(pFromU2022State->g != 0) {
   3447             /* not in ASCII mode: switch to ASCII */
   3448             pFromU2022State->g = 0;
   3449             *p++ = UCNV_SI;
   3450         }
   3451         *p++ = subchar[0];
   3452         break;
   3453     case 'k':
   3454         if(myConverterData->version == 0) {
   3455             if(length == 1) {
   3456                 if((UBool)args->converter->fromUnicodeStatus) {
   3457                     /* in DBCS mode: switch to SBCS */
   3458                     args->converter->fromUnicodeStatus = 0;
   3459                     *p++ = UCNV_SI;
   3460                 }
   3461                 *p++ = subchar[0];
   3462             } else /* length == 2*/ {
   3463                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3464                     /* in SBCS mode: switch to DBCS */
   3465                     args->converter->fromUnicodeStatus = 1;
   3466                     *p++ = UCNV_SO;
   3467                 }
   3468                 *p++ = subchar[0];
   3469                 *p++ = subchar[1];
   3470             }
   3471             break;
   3472         } else {
   3473             /* save the subconverter's substitution string */
   3474             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3475             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3476 
   3477             /* set our substitution string into the subconverter */
   3478             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3479             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3480 
   3481             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3482             args->converter = myConverterData->currentConverter;
   3483             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3484             ucnv_cbFromUWriteSub(args, 0, err);
   3485             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3486             args->converter = cnv;
   3487 
   3488             /* restore the subconverter's substitution string */
   3489             myConverterData->currentConverter->subChars = currentSubChars;
   3490             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3491 
   3492             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3493                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3494                     uprv_memcpy(
   3495                         cnv->charErrorBuffer,
   3496                         myConverterData->currentConverter->charErrorBuffer,
   3497                         myConverterData->currentConverter->charErrorBufferLength);
   3498                 }
   3499                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3500                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3501             }
   3502             return;
   3503         }
   3504     default:
   3505         /* not expected */
   3506         break;
   3507     }
   3508     ucnv_cbFromUWriteBytes(args,
   3509                            buffer, (int32_t)(p - buffer),
   3510                            offsetIndex, err);
   3511 }
   3512 
   3513 /*
   3514  * Structure for cloning an ISO 2022 converter into a single memory block.
   3515  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3516  * and then ucnv_safeClone() of the sub-converter may additionally align
   3517  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3518  * after currentConverter.
   3519  * This is because UAlignedMemory may be larger than the actually
   3520  * necessary alignment size for the platform.
   3521  * The other cloneStruct fields will not be moved around,
   3522  * and are aligned properly with cloneStruct's alignment.
   3523  */
   3524 struct cloneStruct
   3525 {
   3526     UConverter cnv;
   3527     UConverter currentConverter;
   3528     UAlignedMemory deadSpace;
   3529     UConverterDataISO2022 mydata;
   3530 };
   3531 
   3532 
   3533 static UConverter *
   3534 _ISO_2022_SafeClone(
   3535             const UConverter *cnv,
   3536             void *stackBuffer,
   3537             int32_t *pBufferSize,
   3538             UErrorCode *status)
   3539 {
   3540     struct cloneStruct * localClone;
   3541     UConverterDataISO2022 *cnvData;
   3542     int32_t i, size;
   3543 
   3544     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3545         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3546         return NULL;
   3547     }
   3548 
   3549     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3550     localClone = (struct cloneStruct *)stackBuffer;
   3551 
   3552     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3553 
   3554     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3555     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3556     localClone->cnv.isExtraLocal = TRUE;
   3557 
   3558     /* share the subconverters */
   3559 
   3560     if(cnvData->currentConverter != NULL) {
   3561         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3562         localClone->mydata.currentConverter =
   3563             ucnv_safeClone(cnvData->currentConverter,
   3564                             &localClone->currentConverter,
   3565                             &size, status);
   3566         if(U_FAILURE(*status)) {
   3567             return NULL;
   3568         }
   3569     }
   3570 
   3571     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3572         if(cnvData->myConverterArray[i] != NULL) {
   3573             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3574         }
   3575     }
   3576 
   3577     return &localClone->cnv;
   3578 }
   3579 
   3580 static void
   3581 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3582                     const USetAdder *sa,
   3583                     UConverterUnicodeSet which,
   3584                     UErrorCode *pErrorCode)
   3585 {
   3586     int32_t i;
   3587     UConverterDataISO2022* cnvData;
   3588 
   3589     if (U_FAILURE(*pErrorCode)) {
   3590         return;
   3591     }
   3592 #ifdef U_ENABLE_GENERIC_ISO_2022
   3593     if (cnv->sharedData == &_ISO2022Data) {
   3594         /* We use UTF-8 in this case */
   3595         sa->addRange(sa->set, 0, 0xd7FF);
   3596         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3597         return;
   3598     }
   3599 #endif
   3600 
   3601     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3602 
   3603     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3604     switch(cnvData->locale[0]){
   3605     case 'j':
   3606         /* include JIS X 0201 which is hardcoded */
   3607         sa->add(sa->set, 0xa5);
   3608         sa->add(sa->set, 0x203e);
   3609         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3610             /* include Latin-1 for some variants of JP */
   3611             sa->addRange(sa->set, 0, 0xff);
   3612         } else {
   3613             /* include ASCII for JP */
   3614             sa->addRange(sa->set, 0, 0x7f);
   3615         }
   3616         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3617             /*
   3618              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3619              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3620              * use half-width Katakana.
   3621              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3622              * half-width Katakana via the ESC ( I sequence.
   3623              * However, we only emit (fromUnicode) half-width Katakana according to the
   3624              * definition of each variant.
   3625              *
   3626              * When including fallbacks,
   3627              * we need to include half-width Katakana Unicode code points for all JP variants because
   3628              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3629              */
   3630             /* include half-width Katakana for JP */
   3631             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3632         }
   3633         break;
   3634     case 'c':
   3635     case 'z':
   3636         /* include ASCII for CN */
   3637         sa->addRange(sa->set, 0, 0x7f);
   3638         break;
   3639     case 'k':
   3640         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3641         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3642                 cnvData->currentConverter, sa, which, pErrorCode);
   3643         /* the loop over myConverterArray[] will simply not find another converter */
   3644         break;
   3645     default:
   3646         break;
   3647     }
   3648 
   3649 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3650             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3651                 cnvData->version==0 && i==CNS_11643
   3652             ) {
   3653                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3654                 ucnv_MBCSGetUnicodeSetForBytes(
   3655                         cnvData->myConverterArray[i],
   3656                         sa, UCNV_ROUNDTRIP_SET,
   3657                         0, 0x81, 0x82,
   3658                         pErrorCode);
   3659             }
   3660 #endif
   3661 
   3662     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3663         UConverterSetFilter filter;
   3664         if(cnvData->myConverterArray[i]!=NULL) {
   3665             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3666                 cnvData->version==0 && i==CNS_11643
   3667             ) {
   3668                 /*
   3669                  * Version-specific for CN:
   3670                  * CN version 0 does not map CNS planes 3..7 although
   3671                  * they are all available in the CNS conversion table;
   3672                  * CN version 1 (-EXT) does map them all.
   3673                  * The two versions create different Unicode sets.
   3674                  */
   3675                 filter=UCNV_SET_FILTER_2022_CN;
   3676             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3677                 /*
   3678                  * Only add code points that map to Shift-JIS codes
   3679                  * corresponding to JIS X 0208.
   3680                  */
   3681                 filter=UCNV_SET_FILTER_SJIS;
   3682             } else if(i==KSC5601) {
   3683                 /*
   3684                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3685                  * are broader than GR94.
   3686                  */
   3687                 filter=UCNV_SET_FILTER_GR94DBCS;
   3688             } else {
   3689                 filter=UCNV_SET_FILTER_NONE;
   3690             }
   3691             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3692         }
   3693     }
   3694 
   3695     /*
   3696      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3697      * sub-converters do by themselves.
   3698      * Remove these characters from the set.
   3699      */
   3700     sa->remove(sa->set, 0x0e);
   3701     sa->remove(sa->set, 0x0f);
   3702     sa->remove(sa->set, 0x1b);
   3703 
   3704     /* ISO 2022 converters do not convert C1 controls either */
   3705     sa->removeRange(sa->set, 0x80, 0x9f);
   3706 }
   3707 
   3708 static const UConverterImpl _ISO2022Impl={
   3709     UCNV_ISO_2022,
   3710 
   3711     NULL,
   3712     NULL,
   3713 
   3714     _ISO2022Open,
   3715     _ISO2022Close,
   3716     _ISO2022Reset,
   3717 
   3718 #ifdef U_ENABLE_GENERIC_ISO_2022
   3719     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3720     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3721     ucnv_fromUnicode_UTF8,
   3722     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3723 #else
   3724     NULL,
   3725     NULL,
   3726     NULL,
   3727     NULL,
   3728 #endif
   3729     NULL,
   3730 
   3731     NULL,
   3732     _ISO2022getName,
   3733     _ISO_2022_WriteSub,
   3734     _ISO_2022_SafeClone,
   3735     _ISO_2022_GetUnicodeSet
   3736 };
   3737 static const UConverterStaticData _ISO2022StaticData={
   3738     sizeof(UConverterStaticData),
   3739     "ISO_2022",
   3740     2022,
   3741     UCNV_IBM,
   3742     UCNV_ISO_2022,
   3743     1,
   3744     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3745     { 0x1a, 0, 0, 0 },
   3746     1,
   3747     FALSE,
   3748     FALSE,
   3749     0,
   3750     0,
   3751     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3752 };
   3753 const UConverterSharedData _ISO2022Data={
   3754     sizeof(UConverterSharedData),
   3755     ~((uint32_t) 0),
   3756     NULL,
   3757     NULL,
   3758     &_ISO2022StaticData,
   3759     FALSE,
   3760     &_ISO2022Impl,
   3761     0
   3762 };
   3763 
   3764 /*************JP****************/
   3765 static const UConverterImpl _ISO2022JPImpl={
   3766     UCNV_ISO_2022,
   3767 
   3768     NULL,
   3769     NULL,
   3770 
   3771     _ISO2022Open,
   3772     _ISO2022Close,
   3773     _ISO2022Reset,
   3774 
   3775     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3776     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3777     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3778     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3779     NULL,
   3780 
   3781     NULL,
   3782     _ISO2022getName,
   3783     _ISO_2022_WriteSub,
   3784     _ISO_2022_SafeClone,
   3785     _ISO_2022_GetUnicodeSet
   3786 };
   3787 static const UConverterStaticData _ISO2022JPStaticData={
   3788     sizeof(UConverterStaticData),
   3789     "ISO_2022_JP",
   3790     0,
   3791     UCNV_IBM,
   3792     UCNV_ISO_2022,
   3793     1,
   3794     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3795     { 0x1a, 0, 0, 0 },
   3796     1,
   3797     FALSE,
   3798     FALSE,
   3799     0,
   3800     0,
   3801     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3802 };
   3803 static const UConverterSharedData _ISO2022JPData={
   3804     sizeof(UConverterSharedData),
   3805     ~((uint32_t) 0),
   3806     NULL,
   3807     NULL,
   3808     &_ISO2022JPStaticData,
   3809     FALSE,
   3810     &_ISO2022JPImpl,
   3811     0
   3812 };
   3813 
   3814 /************* KR ***************/
   3815 static const UConverterImpl _ISO2022KRImpl={
   3816     UCNV_ISO_2022,
   3817 
   3818     NULL,
   3819     NULL,
   3820 
   3821     _ISO2022Open,
   3822     _ISO2022Close,
   3823     _ISO2022Reset,
   3824 
   3825     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3826     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3827     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3828     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3829     NULL,
   3830 
   3831     NULL,
   3832     _ISO2022getName,
   3833     _ISO_2022_WriteSub,
   3834     _ISO_2022_SafeClone,
   3835     _ISO_2022_GetUnicodeSet
   3836 };
   3837 static const UConverterStaticData _ISO2022KRStaticData={
   3838     sizeof(UConverterStaticData),
   3839     "ISO_2022_KR",
   3840     0,
   3841     UCNV_IBM,
   3842     UCNV_ISO_2022,
   3843     1,
   3844     3, /* max 3 bytes per UChar: SO+DBCS */
   3845     { 0x1a, 0, 0, 0 },
   3846     1,
   3847     FALSE,
   3848     FALSE,
   3849     0,
   3850     0,
   3851     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3852 };
   3853 static const UConverterSharedData _ISO2022KRData={
   3854     sizeof(UConverterSharedData),
   3855     ~((uint32_t) 0),
   3856     NULL,
   3857     NULL,
   3858     &_ISO2022KRStaticData,
   3859     FALSE,
   3860     &_ISO2022KRImpl,
   3861     0
   3862 };
   3863 
   3864 /*************** CN ***************/
   3865 static const UConverterImpl _ISO2022CNImpl={
   3866 
   3867     UCNV_ISO_2022,
   3868 
   3869     NULL,
   3870     NULL,
   3871 
   3872     _ISO2022Open,
   3873     _ISO2022Close,
   3874     _ISO2022Reset,
   3875 
   3876     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3877     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3878     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3879     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3880     NULL,
   3881 
   3882     NULL,
   3883     _ISO2022getName,
   3884     _ISO_2022_WriteSub,
   3885     _ISO_2022_SafeClone,
   3886     _ISO_2022_GetUnicodeSet
   3887 };
   3888 static const UConverterStaticData _ISO2022CNStaticData={
   3889     sizeof(UConverterStaticData),
   3890     "ISO_2022_CN",
   3891     0,
   3892     UCNV_IBM,
   3893     UCNV_ISO_2022,
   3894     1,
   3895     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3896     { 0x1a, 0, 0, 0 },
   3897     1,
   3898     FALSE,
   3899     FALSE,
   3900     0,
   3901     0,
   3902     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3903 };
   3904 static const UConverterSharedData _ISO2022CNData={
   3905     sizeof(UConverterSharedData),
   3906     ~((uint32_t) 0),
   3907     NULL,
   3908     NULL,
   3909     &_ISO2022CNStaticData,
   3910     FALSE,
   3911     &_ISO2022CNImpl,
   3912     0
   3913 };
   3914 
   3915 
   3916 
   3917 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3918