Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "unicode/utf16.h"
     38 #include "ucnv_imp.h"
     39 #include "ucnv_bld.h"
     40 #include "ucnv_cnv.h"
     41 #include "ucnvmbcs.h"
     42 #include "cstring.h"
     43 #include "cmemory.h"
     44 #include "uassert.h"
     45 
     46 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     47 
     48 #ifdef U_ENABLE_GENERIC_ISO_2022
     49 /*
     50  * I am disabling the generic ISO-2022 converter after proposing to do so on
     51  * the icu mailing list two days ago.
     52  *
     53  * Reasons:
     54  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     55  *    its designation sequences, single shifts with return to the previous state,
     56  *    switch-with-no-return to UTF-16BE or similar, etc.
     57  *    This is unlike the language-specific variants like ISO-2022-JP which
     58  *    require a much smaller repertoire of ISO-2022 features.
     59  *    These variants continue to be supported.
     60  * 2. I believe that no one is really using the generic ISO-2022 converter
     61  *    but rather always one of the language-specific variants.
     62  *    Note that ICU's generic ISO-2022 converter has always output one escape
     63  *    sequence followed by UTF-8 for the whole stream.
     64  * 3. Switching between subcharsets is extremely slow, because each time
     65  *    the previous converter is closed and a new one opened,
     66  *    without any kind of caching, least-recently-used list, etc.
     67  * 4. The code is currently buggy, and given the above it does not seem
     68  *    reasonable to spend the time on maintenance.
     69  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     70  *    This means, for example, that when ISO-8859-7 is designated, the following
     71  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     72  *    The ICU ISO-2022 converter does not handle this - and has no information
     73  *    about which subconverter would have to be shifted vs. which is designed
     74  *    for 7-bit ISO-2022.
     75  *
     76  * Markus Scherer 2003-dec-03
     77  */
     78 #endif
     79 
     80 static const char SHIFT_IN_STR[]  = "\x0F";
     81 // static const char SHIFT_OUT_STR[] = "\x0E";
     82 
     83 #define CR      0x0D
     84 #define LF      0x0A
     85 #define H_TAB   0x09
     86 #define V_TAB   0x0B
     87 #define SPACE   0x20
     88 
     89 enum {
     90     HWKANA_START=0xff61,
     91     HWKANA_END=0xff9f
     92 };
     93 
     94 /*
     95  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     96  * as bytes 21..7E. (Subtract 0x80.)
     97  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     98  * as bytes 20..7F. (Subtract 0x80.)
     99  * Do not encode C1 control codes with native bytes 80..9F
    100  * as bytes 00..1F (C0 control codes).
    101  */
    102 enum {
    103     GR94_START=0xa1,
    104     GR94_END=0xfe,
    105     GR96_START=0xa0,
    106     GR96_END=0xff
    107 };
    108 
    109 /*
    110  * ISO 2022 control codes must not be converted from Unicode
    111  * because they would mess up the byte stream.
    112  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    113  * corresponding to SO, SI, and ESC.
    114  */
    115 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    116 
    117 /* for ISO-2022-JP and -CN implementations */
    118 typedef enum  {
    119         /* shared values */
    120         INVALID_STATE=-1,
    121         ASCII = 0,
    122 
    123         SS2_STATE=0x10,
    124         SS3_STATE,
    125 
    126         /* JP */
    127         ISO8859_1 = 1 ,
    128         ISO8859_7 = 2 ,
    129         JISX201  = 3,
    130         JISX208 = 4,
    131         JISX212 = 5,
    132         GB2312  =6,
    133         KSC5601 =7,
    134         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    135 
    136         /* CN */
    137         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    138         GB2312_1=1,
    139         ISO_IR_165=2,
    140         CNS_11643=3,
    141 
    142         /*
    143          * these are used in StateEnum and ISO2022State variables,
    144          * but CNS_11643 must be used to index into myConverterArray[]
    145          */
    146         CNS_11643_0=0x20,
    147         CNS_11643_1,
    148         CNS_11643_2,
    149         CNS_11643_3,
    150         CNS_11643_4,
    151         CNS_11643_5,
    152         CNS_11643_6,
    153         CNS_11643_7
    154 } StateEnum;
    155 
    156 /* is the StateEnum charset value for a DBCS charset? */
    157 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    158 
    159 #define CSM(cs) ((uint16_t)1<<(cs))
    160 
    161 /*
    162  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    163  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    164  *
    165  * Note: The converter uses some leniency:
    166  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    167  *   all versions, not just JIS7 and JIS8.
    168  * - ICU does not distinguish between different versions of JIS X 0208.
    169  */
    170 enum { MAX_JA_VERSION=4 };
    171 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    174     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    175     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    176     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    177 };
    178 
    179 typedef enum {
    180         ASCII1=0,
    181         LATIN1,
    182         SBCS,
    183         DBCS,
    184         MBCS,
    185         HWKANA
    186 }Cnv2022Type;
    187 
    188 typedef struct ISO2022State {
    189     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    190     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    191     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    192 } ISO2022State;
    193 
    194 #define UCNV_OPTIONS_VERSION_MASK 0xf
    195 #define UCNV_2022_MAX_CONVERTERS 10
    196 
    197 typedef struct{
    198     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    199     UConverter *currentConverter;
    200     Cnv2022Type currentType;
    201     ISO2022State toU2022State, fromU2022State;
    202     uint32_t key;
    203     uint32_t version;
    204 #ifdef U_ENABLE_GENERIC_ISO_2022
    205     UBool isFirstBuffer;
    206 #endif
    207     UBool isEmptySegment;
    208     char name[30];
    209     char locale[3];
    210 }UConverterDataISO2022;
    211 
    212 /* Protos */
    213 /* ISO-2022 ----------------------------------------------------------------- */
    214 
    215 /*Forward declaration */
    216 U_CFUNC void
    217 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    218                       UErrorCode * err);
    219 U_CFUNC void
    220 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    221                                     UErrorCode * err);
    222 
    223 #define ESC_2022 0x1B /*ESC*/
    224 
    225 typedef enum
    226 {
    227         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    228         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    229         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    230         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    231 } UCNV_TableStates_2022;
    232 
    233 /*
    234 * The way these state transition arrays work is:
    235 * ex : ESC$B is the sequence for JISX208
    236 *      a) First Iteration: char is ESC
    237 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    238 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    239 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    240 *             value of x is stored at escSeqStateTable_Key_2022[0]
    241 *        iii) Save this index as offset
    242 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    243 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    244 *     b) Switch on this state and continue to next char
    245 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    246 *             which is normalize_esq_chars_2022[36] == 4
    247 *         ii) x is currently 1(from above)
    248 *               x<<=5 -- x is now 32
    249 *               x+=normalize_esq_chars_2022[36]
    250 *               now x is 36
    251 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    252 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    253 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    254 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    255 *     c) Switch on this state and continue to next char
    256 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    257 *        ii) x is currently 36 (from above)
    258 *            x<<=5 -- x is now 1152
    259 *            x+=normalize_esq_chars_2022[66]
    260 *            now x is 1161
    261 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    262 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    263 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    264 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    265 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    266 */
    267 
    268 
    269 /*Below are the 3 arrays depicting a state transition table*/
    270 static const int8_t normalize_esq_chars_2022[256] = {
    271 /*       0      1       2       3       4      5       6        7       8       9           */
    272 
    273          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    274         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    275         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    276         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    277         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    278         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    279         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    280         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    281         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    282         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    297         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    298         ,0     ,0      ,0      ,0      ,0      ,0
    299 };
    300 
    301 #ifdef U_ENABLE_GENERIC_ISO_2022
    302 /*
    303  * When the generic ISO-2022 converter is completely removed, not just disabled
    304  * per #ifdef, then the following state table and the associated tables that are
    305  * dimensioned with MAX_STATES_2022 should be trimmed.
    306  *
    307  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    308  * the associated escape sequences starting with ESC ( B should be removed.
    309  * This includes the ones with key values 1097 and all of the ones above 1000000.
    310  *
    311  * For the latter, the tables can simply be truncated.
    312  * For the former, since the tables must be kept parallel, it is probably best
    313  * to simply duplicate an adjacent table cell, parallel in all tables.
    314  *
    315  * It may make sense to restructure the tables, especially by using small search
    316  * tables for the variants instead of indexing them parallel to the table here.
    317  */
    318 #endif
    319 
    320 #define MAX_STATES_2022 74
    321 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    322 /*   0           1           2           3           4           5           6           7           8           9           */
    323 
    324      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    325     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    326     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    327     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    328     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    329     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    330     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    331     ,35947631   ,35947635   ,35947636   ,35947638
    332 };
    333 
    334 #ifdef U_ENABLE_GENERIC_ISO_2022
    335 
    336 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    337  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    338 
    339      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    340     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    341     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    342     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    343     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    344     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    345     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    346     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    347 };
    348 
    349 #endif
    350 
    351 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    352 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    353      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    354     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    355     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    357     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    358     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    359     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    360     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    361 };
    362 
    363 
    364 /* Type def for refactoring changeState_2022 code*/
    365 typedef enum{
    366 #ifdef U_ENABLE_GENERIC_ISO_2022
    367     ISO_2022=0,
    368 #endif
    369     ISO_2022_JP=1,
    370     ISO_2022_KR=2,
    371     ISO_2022_CN=3
    372 } Variant2022;
    373 
    374 /*********** ISO 2022 Converter Protos ***********/
    375 static void
    376 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    377 
    378 static void
    379  _ISO2022Close(UConverter *converter);
    380 
    381 static void
    382 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    383 
    384 static const char*
    385 _ISO2022getName(const UConverter* cnv);
    386 
    387 static void
    388 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    389 
    390 static UConverter *
    391 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    392 
    393 #ifdef U_ENABLE_GENERIC_ISO_2022
    394 static void
    395 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    396 #endif
    397 
    398 namespace {
    399 
    400 /*const UConverterSharedData _ISO2022Data;*/
    401 extern const UConverterSharedData _ISO2022JPData;
    402 extern const UConverterSharedData _ISO2022KRData;
    403 extern const UConverterSharedData _ISO2022CNData;
    404 
    405 }  // namespace
    406 
    407 /*************** Converter implementations ******************/
    408 
    409 /* The purpose of this function is to get around gcc compiler warnings. */
    410 static inline void
    411 fromUWriteUInt8(UConverter *cnv,
    412                  const char *bytes, int32_t length,
    413                  uint8_t **target, const char *targetLimit,
    414                  int32_t **offsets,
    415                  int32_t sourceIndex,
    416                  UErrorCode *pErrorCode)
    417 {
    418     char *targetChars = (char *)*target;
    419     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    420                          offsets, sourceIndex, pErrorCode);
    421     *target = (uint8_t*)targetChars;
    422 
    423 }
    424 
    425 static inline void
    426 setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){
    427     if(myConverterData->version == 1) {
    428         UConverter *cnv = myConverterData->currentConverter;
    429 
    430         cnv->toUnicodeStatus=0;     /* offset */
    431         cnv->mode=0;                /* state */
    432         cnv->toULength=0;           /* byteIndex */
    433     }
    434 }
    435 
    436 static inline void
    437 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    438    /* in ISO-2022-KR the designator sequence appears only once
    439     * in a file so we append it only once
    440     */
    441     if( converter->charErrorBufferLength==0){
    442 
    443         converter->charErrorBufferLength = 4;
    444         converter->charErrorBuffer[0] = 0x1b;
    445         converter->charErrorBuffer[1] = 0x24;
    446         converter->charErrorBuffer[2] = 0x29;
    447         converter->charErrorBuffer[3] = 0x43;
    448     }
    449     if(myConverterData->version == 1) {
    450         UConverter *cnv = myConverterData->currentConverter;
    451 
    452         cnv->fromUChar32=0;
    453         cnv->fromUnicodeStatus=1;   /* prevLength */
    454     }
    455 }
    456 
    457 static void
    458 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    459 
    460     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    461 
    462     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    463     if(cnv->extraInfo != NULL) {
    464         UConverterNamePieces stackPieces;
    465         UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
    466         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    467         uint32_t version;
    468 
    469         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    470 
    471         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    472         myConverterData->currentType = ASCII1;
    473         cnv->fromUnicodeStatus =FALSE;
    474         if(pArgs->locale){
    475             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    476         }
    477         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    478         myConverterData->version = version;
    479         /* Begin Google-specific change. */
    480         /* The "jk" locale ID was made up for KDDI ISO-2022-JP. */
    481         /* The "js" locale ID was made up for SoftBank ISO-2022-JP. */
    482         if((myLocale[0]=='j' &&
    483             (myLocale[1]=='a'|| myLocale[1]=='p' || myLocale[1]=='k' ||
    484              myLocale[1]=='s') &&
    485             (myLocale[2]=='_' || myLocale[2]=='\0')))
    486         {
    487             size_t len=0;
    488             /* open the required converters and cache them */
    489             if(version>MAX_JA_VERSION) {
    490                 /* prevent indexing beyond jpCharsetMasks[] */
    491                 myConverterData->version = version = 0;
    492             }
    493             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    494                 myConverterData->myConverterArray[ISO8859_7] =
    495                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    496             }
    497             if (myLocale[1]=='k') {  /* Use KDDI's version. */
    498                 myConverterData->myConverterArray[JISX208]  =
    499                     ucnv_loadSharedData("kddi-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    500             } else if (myLocale[1]=='s') {  /* Use SoftBank's version. */
    501                 myConverterData->myConverterArray[JISX208]  =
    502                     ucnv_loadSharedData("softbank-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    503             } else {
    504                 /*
    505                  * Change for http://b/issue?id=937017 :
    506                  * Restore JIS X 0208 ISO-2022-JP mappings from before
    507                  * sharing the table with the Shift-JIS converter
    508                  * (CL 5963009 and http://bugs.icu-project.org/trac/ticket/5797).
    509                  * TODO(mscherer): Create and use a new, unified Google Shift-JIS
    510                  * table for both Shift-JIS and ISO-2022-JP.
    511                  */
    512                 myConverterData->myConverterArray[JISX208]  =
    513                     ucnv_loadSharedData("jisx-208", &stackPieces, &stackArgs, errorCode);
    514             }
    515             /* End Google-specific change. */
    516             if(jpCharsetMasks[version]&CSM(JISX212)) {
    517                 myConverterData->myConverterArray[JISX212] =
    518                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    519             }
    520             if(jpCharsetMasks[version]&CSM(GB2312)) {
    521                 myConverterData->myConverterArray[GB2312] =
    522                     /* BEGIN android-changed */
    523                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
    524                     /* END android-changed */
    525             }
    526             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    527                 myConverterData->myConverterArray[KSC5601] =
    528                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    529             }
    530 
    531             /* set the function pointers to appropriate funtions */
    532             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    533             uprv_strcpy(myConverterData->locale,"ja");
    534 
    535             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    536             len = uprv_strlen(myConverterData->name);
    537             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    538             myConverterData->name[len+1]='\0';
    539         }
    540         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    541             (myLocale[2]=='_' || myLocale[2]=='\0'))
    542         {
    543             const char *cnvName;
    544             if(version==1) {
    545                 cnvName="icu-internal-25546";
    546             } else {
    547                 /* BEGIN android-changed */
    548                 cnvName="ksc_5601";
    549                 /* END android-changed */
    550                 myConverterData->version=version=0;
    551             }
    552             if(pArgs->onlyTestIsLoadable) {
    553                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    554                 uprv_free(cnv->extraInfo);
    555                 cnv->extraInfo=NULL;
    556                 return;
    557             } else {
    558                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    559                 if (U_FAILURE(*errorCode)) {
    560                     _ISO2022Close(cnv);
    561                     return;
    562                 }
    563 
    564                 if(version==1) {
    565                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    566                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    567                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    568                 }else{
    569                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    570                 }
    571 
    572                 /* initialize the state variables */
    573                 setInitialStateToUnicodeKR(cnv, myConverterData);
    574                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    575 
    576                 /* set the function pointers to appropriate funtions */
    577                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    578                 uprv_strcpy(myConverterData->locale,"ko");
    579             }
    580         }
    581         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    582             (myLocale[2]=='_' || myLocale[2]=='\0'))
    583         {
    584 
    585             /* open the required converters and cache them */
    586             /* BEGIN android-changed */
    587             myConverterData->myConverterArray[GB2312_1] =
    588                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    589             if(version==1) {
    590                 myConverterData->myConverterArray[ISO_IR_165] =
    591                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    592             }
    593             myConverterData->myConverterArray[CNS_11643] =
    594                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    595             /* END android-changed */
    596 
    597 
    598             /* set the function pointers to appropriate funtions */
    599             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    600             uprv_strcpy(myConverterData->locale,"cn");
    601 
    602             if (version==0){
    603                 myConverterData->version = 0;
    604                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    605             }else if (version==1){
    606                 myConverterData->version = 1;
    607                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    608             }else {
    609                 myConverterData->version = 2;
    610                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
    611             }
    612         }
    613         else{
    614 #ifdef U_ENABLE_GENERIC_ISO_2022
    615             myConverterData->isFirstBuffer = TRUE;
    616 
    617             /* append the UTF-8 escape sequence */
    618             cnv->charErrorBufferLength = 3;
    619             cnv->charErrorBuffer[0] = 0x1b;
    620             cnv->charErrorBuffer[1] = 0x25;
    621             cnv->charErrorBuffer[2] = 0x42;
    622 
    623             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    624             /* initialize the state variables */
    625             uprv_strcpy(myConverterData->name,"ISO_2022");
    626 #else
    627             *errorCode = U_UNSUPPORTED_ERROR;
    628             return;
    629 #endif
    630         }
    631 
    632         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    633 
    634         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    635             _ISO2022Close(cnv);
    636         }
    637     } else {
    638         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    639     }
    640 }
    641 
    642 
    643 static void
    644 _ISO2022Close(UConverter *converter) {
    645     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    646     UConverterSharedData **array = myData->myConverterArray;
    647     int32_t i;
    648 
    649     if (converter->extraInfo != NULL) {
    650         /*close the array of converter pointers and free the memory*/
    651         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    652             if(array[i]!=NULL) {
    653                 ucnv_unloadSharedDataIfReady(array[i]);
    654             }
    655         }
    656 
    657         ucnv_close(myData->currentConverter);
    658 
    659         if(!converter->isExtraLocal){
    660             uprv_free (converter->extraInfo);
    661             converter->extraInfo = NULL;
    662         }
    663     }
    664 }
    665 
    666 static void
    667 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    668     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    669     if(choice<=UCNV_RESET_TO_UNICODE) {
    670         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    671         myConverterData->key = 0;
    672         myConverterData->isEmptySegment = FALSE;
    673     }
    674     if(choice!=UCNV_RESET_TO_UNICODE) {
    675         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    676     }
    677 #ifdef U_ENABLE_GENERIC_ISO_2022
    678     if(myConverterData->locale[0] == 0){
    679         if(choice<=UCNV_RESET_TO_UNICODE) {
    680             myConverterData->isFirstBuffer = TRUE;
    681             myConverterData->key = 0;
    682             if (converter->mode == UCNV_SO){
    683                 ucnv_close (myConverterData->currentConverter);
    684                 myConverterData->currentConverter=NULL;
    685             }
    686             converter->mode = UCNV_SI;
    687         }
    688         if(choice!=UCNV_RESET_TO_UNICODE) {
    689             /* re-append UTF-8 escape sequence */
    690             converter->charErrorBufferLength = 3;
    691             converter->charErrorBuffer[0] = 0x1b;
    692             converter->charErrorBuffer[1] = 0x28;
    693             converter->charErrorBuffer[2] = 0x42;
    694         }
    695     }
    696     else
    697 #endif
    698     {
    699         /* reset the state variables */
    700         if(myConverterData->locale[0] == 'k'){
    701             if(choice<=UCNV_RESET_TO_UNICODE) {
    702                 setInitialStateToUnicodeKR(converter, myConverterData);
    703             }
    704             if(choice!=UCNV_RESET_TO_UNICODE) {
    705                 setInitialStateFromUnicodeKR(converter, myConverterData);
    706             }
    707         }
    708     }
    709 }
    710 
    711 static const char*
    712 _ISO2022getName(const UConverter* cnv){
    713     if(cnv->extraInfo){
    714         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    715         return myData->name;
    716     }
    717     return NULL;
    718 }
    719 
    720 
    721 /*************** to unicode *******************/
    722 /****************************************************************************
    723  * Recognized escape sequences are
    724  * <ESC>(B  ASCII
    725  * <ESC>.A  ISO-8859-1
    726  * <ESC>.F  ISO-8859-7
    727  * <ESC>(J  JISX-201
    728  * <ESC>(I  JISX-201
    729  * <ESC>$B  JISX-208
    730  * <ESC>$@  JISX-208
    731  * <ESC>$(D JISX-212
    732  * <ESC>$A  GB2312
    733  * <ESC>$(C KSC5601
    734  */
    735 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    736 /*      0                1               2               3               4               5               6               7               8               9    */
    737     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    738     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    739     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    740     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    741     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    742     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    743     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    744     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    745 };
    746 
    747 /*************** to unicode *******************/
    748 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    749 /*      0                1               2               3               4               5               6               7               8               9    */
    750      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    751     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    752     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    753     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    754     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    755     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    756     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    757     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    758 };
    759 
    760 
    761 static UCNV_TableStates_2022
    762 getKey_2022(char c,int32_t* key,int32_t* offset){
    763     int32_t togo;
    764     int32_t low = 0;
    765     int32_t hi = MAX_STATES_2022;
    766     int32_t oldmid=0;
    767 
    768     togo = normalize_esq_chars_2022[(uint8_t)c];
    769     if(togo == 0) {
    770         /* not a valid character anywhere in an escape sequence */
    771         *key = 0;
    772         *offset = 0;
    773         return INVALID_2022;
    774     }
    775     togo = (*key << 5) + togo;
    776 
    777     while (hi != low)  /*binary search*/{
    778 
    779         int32_t mid = (hi+low) >> 1; /*Finds median*/
    780 
    781         if (mid == oldmid)
    782             break;
    783 
    784         if (escSeqStateTable_Key_2022[mid] > togo){
    785             hi = mid;
    786         }
    787         else if (escSeqStateTable_Key_2022[mid] < togo){
    788             low = mid;
    789         }
    790         else /*we found it*/{
    791             *key = togo;
    792             *offset = mid;
    793             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    794         }
    795         oldmid = mid;
    796 
    797     }
    798 
    799     *key = 0;
    800     *offset = 0;
    801     return INVALID_2022;
    802 }
    803 
    804 /*runs through a state machine to determine the escape sequence - codepage correspondance
    805  */
    806 static void
    807 changeState_2022(UConverter* _this,
    808                 const char** source,
    809                 const char* sourceLimit,
    810                 Variant2022 var,
    811                 UErrorCode* err){
    812     UCNV_TableStates_2022 value;
    813     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    814     uint32_t key = myData2022->key;
    815     int32_t offset = 0;
    816     int8_t initialToULength = _this->toULength;
    817     char c;
    818 
    819     value = VALID_NON_TERMINAL_2022;
    820     while (*source < sourceLimit) {
    821         c = *(*source)++;
    822         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    823         value = getKey_2022(c,(int32_t *) &key, &offset);
    824 
    825         switch (value){
    826 
    827         case VALID_NON_TERMINAL_2022 :
    828             /* continue with the loop */
    829             break;
    830 
    831         case VALID_TERMINAL_2022:
    832             key = 0;
    833             goto DONE;
    834 
    835         case INVALID_2022:
    836             goto DONE;
    837 
    838         case VALID_MAYBE_TERMINAL_2022:
    839 #ifdef U_ENABLE_GENERIC_ISO_2022
    840             /* ESC ( B is ambiguous only for ISO_2022 itself */
    841             if(var == ISO_2022) {
    842                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    843                 _this->toULength = 0;
    844 
    845                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    846 
    847                 /* continue with the loop */
    848                 value = VALID_NON_TERMINAL_2022;
    849                 break;
    850             } else
    851 #endif
    852             {
    853                 /* not ISO_2022 itself, finish here */
    854                 value = VALID_TERMINAL_2022;
    855                 key = 0;
    856                 goto DONE;
    857             }
    858         }
    859     }
    860 
    861 DONE:
    862     myData2022->key = key;
    863 
    864     if (value == VALID_NON_TERMINAL_2022) {
    865         /* indicate that the escape sequence is incomplete: key!=0 */
    866         return;
    867     } else if (value == INVALID_2022 ) {
    868         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    869     } else /* value == VALID_TERMINAL_2022 */ {
    870         switch(var){
    871 #ifdef U_ENABLE_GENERIC_ISO_2022
    872         case ISO_2022:
    873         {
    874             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    875             if(chosenConverterName == NULL) {
    876                 /* SS2 or SS3 */
    877                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    878                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    879                 return;
    880             }
    881 
    882             _this->mode = UCNV_SI;
    883             ucnv_close(myData2022->currentConverter);
    884             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    885             if(U_SUCCESS(*err)) {
    886                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    887                 _this->mode = UCNV_SO;
    888             }
    889             break;
    890         }
    891 #endif
    892         case ISO_2022_JP:
    893             {
    894                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    895                 switch(tempState) {
    896                 case INVALID_STATE:
    897                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    898                     break;
    899                 case SS2_STATE:
    900                     if(myData2022->toU2022State.cs[2]!=0) {
    901                         if(myData2022->toU2022State.g<2) {
    902                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    903                         }
    904                         myData2022->toU2022State.g=2;
    905                     } else {
    906                         /* illegal to have SS2 before a matching designator */
    907                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    908                     }
    909                     break;
    910                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    911                 case ISO8859_1:
    912                 case ISO8859_7:
    913                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    914                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    915                     } else {
    916                         /* G2 charset for SS2 */
    917                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    918                     }
    919                     break;
    920                 default:
    921                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    922                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    923                     } else {
    924                         /* G0 charset */
    925                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    926                     }
    927                     break;
    928                 }
    929             }
    930             break;
    931         case ISO_2022_CN:
    932             {
    933                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    934                 switch(tempState) {
    935                 case INVALID_STATE:
    936                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    937                     break;
    938                 case SS2_STATE:
    939                     if(myData2022->toU2022State.cs[2]!=0) {
    940                         if(myData2022->toU2022State.g<2) {
    941                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    942                         }
    943                         myData2022->toU2022State.g=2;
    944                     } else {
    945                         /* illegal to have SS2 before a matching designator */
    946                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    947                     }
    948                     break;
    949                 case SS3_STATE:
    950                     if(myData2022->toU2022State.cs[3]!=0) {
    951                         if(myData2022->toU2022State.g<2) {
    952                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    953                         }
    954                         myData2022->toU2022State.g=3;
    955                     } else {
    956                         /* illegal to have SS3 before a matching designator */
    957                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    958                     }
    959                     break;
    960                 case ISO_IR_165:
    961                     if(myData2022->version==0) {
    962                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    963                         break;
    964                     }
    965                     /*fall through*/
    966                 case GB2312_1:
    967                     /*fall through*/
    968                 case CNS_11643_1:
    969                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    970                     break;
    971                 case CNS_11643_2:
    972                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    973                     break;
    974                 default:
    975                     /* other CNS 11643 planes */
    976                     if(myData2022->version==0) {
    977                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    978                     } else {
    979                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    980                     }
    981                     break;
    982                 }
    983             }
    984             break;
    985         case ISO_2022_KR:
    986             if(offset==0x30){
    987                 /* nothing to be done, just accept this one escape sequence */
    988             } else {
    989                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    990             }
    991             break;
    992 
    993         default:
    994             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    995             break;
    996         }
    997     }
    998     if(U_SUCCESS(*err)) {
    999         _this->toULength = 0;
   1000     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
   1001         if(_this->toULength>1) {
   1002             /*
   1003              * Ticket 5691: consistent illegal sequences:
   1004              * - We include at least the first byte (ESC) in the illegal sequence.
   1005              * - If any of the non-initial bytes could be the start of a character,
   1006              *   we stop the illegal sequence before the first one of those.
   1007              *   In escape sequences, all following bytes are "printable", that is,
   1008              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
   1009              *   they are valid single/lead bytes.
   1010              *   For simplicity, we always only report the initial ESC byte as the
   1011              *   illegal sequence and back out all other bytes we looked at.
   1012              */
   1013             /* Back out some bytes. */
   1014             int8_t backOutDistance=_this->toULength-1;
   1015             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
   1016             if(backOutDistance<=bytesFromThisBuffer) {
   1017                 /* same as initialToULength<=1 */
   1018                 *source-=backOutDistance;
   1019             } else {
   1020                 /* Back out bytes from the previous buffer: Need to replay them. */
   1021                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
   1022                 /* same as -(initialToULength-1) */
   1023                 /* preToULength is negative! */
   1024                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
   1025                 *source-=bytesFromThisBuffer;
   1026             }
   1027             _this->toULength=1;
   1028         }
   1029     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
   1030         _this->toUCallbackReason = UCNV_UNASSIGNED;
   1031     }
   1032 }
   1033 
   1034 /*Checks the characters of the buffer against valid 2022 escape sequences
   1035 *if the match we return a pointer to the initial start of the sequence otherwise
   1036 *we return sourceLimit
   1037 */
   1038 /*for 2022 looks ahead in the stream
   1039  *to determine the longest possible convertible
   1040  *data stream
   1041  */
   1042 static inline const char*
   1043 getEndOfBuffer_2022(const char** source,
   1044                    const char* sourceLimit,
   1045                    UBool /*flush*/){
   1046 
   1047     const char* mySource = *source;
   1048 
   1049 #ifdef U_ENABLE_GENERIC_ISO_2022
   1050     if (*source >= sourceLimit)
   1051         return sourceLimit;
   1052 
   1053     do{
   1054 
   1055         if (*mySource == ESC_2022){
   1056             int8_t i;
   1057             int32_t key = 0;
   1058             int32_t offset;
   1059             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1060 
   1061             /* Kludge: I could not
   1062             * figure out the reason for validating an escape sequence
   1063             * twice - once here and once in changeState_2022().
   1064             * is it possible to have an ESC character in a ISO2022
   1065             * byte stream which is valid in a code page? Is it legal?
   1066             */
   1067             for (i=0;
   1068             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1069             i++) {
   1070                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1071             }
   1072             if (value > 0 || *mySource==ESC_2022)
   1073                 return mySource;
   1074 
   1075             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1076                 return sourceLimit;
   1077         }
   1078     }while (++mySource < sourceLimit);
   1079 
   1080     return sourceLimit;
   1081 #else
   1082     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1083         ++mySource;
   1084     }
   1085     return mySource;
   1086 #endif
   1087 }
   1088 
   1089 
   1090 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1091  * any future change in _MBCSFromUChar32() function should be reflected here.
   1092  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1093  */
   1094 static inline int32_t
   1095 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1096                                          UChar32 c,
   1097                                          uint32_t* value,
   1098                                          UBool useFallback,
   1099                                          int outputType)
   1100 {
   1101     const int32_t *cx;
   1102     const uint16_t *table;
   1103     uint32_t stage2Entry;
   1104     uint32_t myValue;
   1105     int32_t length;
   1106     const uint8_t *p;
   1107     /*
   1108      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1109      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1110      * else U_INTERNAL_PROGRAM_ERROR.
   1111      */
   1112     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1113     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1114         table=sharedData->mbcs.fromUnicodeTable;
   1115         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1116         /* get the bytes and the length for the output */
   1117         if(outputType==MBCS_OUTPUT_2){
   1118             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1119             if(myValue<=0xff) {
   1120                 length=1;
   1121             } else {
   1122                 length=2;
   1123             }
   1124         } else /* outputType==MBCS_OUTPUT_3 */ {
   1125             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1126             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1127             if(myValue<=0xff) {
   1128                 length=1;
   1129             } else if(myValue<=0xffff) {
   1130                 length=2;
   1131             } else {
   1132                 length=3;
   1133             }
   1134         }
   1135         /* is this code point assigned, or do we use fallbacks? */
   1136         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1137             /* assigned */
   1138             *value=myValue;
   1139             return length;
   1140         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1141             /*
   1142              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1143              * There is no way with this data structure for fallback output
   1144              * to be a zero byte.
   1145              */
   1146             *value=myValue;
   1147             return -length;
   1148         }
   1149     }
   1150 
   1151     cx=sharedData->mbcs.extIndexes;
   1152     if(cx!=NULL) {
   1153         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1154     }
   1155 
   1156     /* unassigned */
   1157     return 0;
   1158 }
   1159 
   1160 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1161  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1162  * @param retval pointer to output byte
   1163  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1164  */
   1165 static inline int32_t
   1166 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1167                                        UChar32 c,
   1168                                        uint32_t* retval,
   1169                                        UBool useFallback)
   1170 {
   1171     const uint16_t *table;
   1172     int32_t value;
   1173     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1174     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1175         return 0;
   1176     }
   1177     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1178     table=sharedData->mbcs.fromUnicodeTable;
   1179     /* get the byte for the output */
   1180     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1181     /* is this code point assigned, or do we use fallbacks? */
   1182     *retval=(uint32_t)(value&0xff);
   1183     if(value>=0xf00) {
   1184         return 1;  /* roundtrip */
   1185     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1186         return -1;  /* fallback taken */
   1187     } else {
   1188         return 0;  /* no mapping */
   1189     }
   1190 }
   1191 
   1192 /*
   1193  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1194  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1195  * to move it to the ISO 2022 range 21..7E.
   1196  * Return 0 if out of range.
   1197  */
   1198 static inline uint32_t
   1199 _2022FromGR94DBCS(uint32_t value) {
   1200     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1201         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1202     ) {
   1203         return value - 0x8080;  /* shift down to 21..7e byte range */
   1204     } else {
   1205         return 0;  /* not valid for ISO 2022 */
   1206     }
   1207 }
   1208 
   1209 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1210 /*
   1211  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1212  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1213  * unchanged.
   1214  */
   1215 static inline uint32_t
   1216 _2022ToGR94DBCS(uint32_t value) {
   1217     uint32_t returnValue = value + 0x8080;
   1218     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1219         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1220         return returnValue;
   1221     } else {
   1222         return value;
   1223     }
   1224 }
   1225 #endif
   1226 
   1227 #ifdef U_ENABLE_GENERIC_ISO_2022
   1228 
   1229 /**********************************************************************************
   1230 *  ISO-2022 Converter
   1231 *
   1232 *
   1233 */
   1234 
   1235 static void
   1236 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1237                                                            UErrorCode* err){
   1238     const char* mySourceLimit, *realSourceLimit;
   1239     const char* sourceStart;
   1240     const UChar* myTargetStart;
   1241     UConverter* saveThis;
   1242     UConverterDataISO2022* myData;
   1243     int8_t length;
   1244 
   1245     saveThis = args->converter;
   1246     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1247 
   1248     realSourceLimit = args->sourceLimit;
   1249     while (args->source < realSourceLimit) {
   1250         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1251             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1252             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1253 
   1254             if(args->source < mySourceLimit) {
   1255                 if(myData->currentConverter==NULL) {
   1256                     myData->currentConverter = ucnv_open("ASCII",err);
   1257                     if(U_FAILURE(*err)){
   1258                         return;
   1259                     }
   1260 
   1261                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1262                     saveThis->mode = UCNV_SO;
   1263                 }
   1264 
   1265                 /* convert to before the ESC or until the end of the buffer */
   1266                 myData->isFirstBuffer=FALSE;
   1267                 sourceStart = args->source;
   1268                 myTargetStart = args->target;
   1269                 args->converter = myData->currentConverter;
   1270                 ucnv_toUnicode(args->converter,
   1271                     &args->target,
   1272                     args->targetLimit,
   1273                     &args->source,
   1274                     mySourceLimit,
   1275                     args->offsets,
   1276                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1277                     err);
   1278                 args->converter = saveThis;
   1279 
   1280                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1281                     /* move the overflow buffer */
   1282                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1283                     myData->currentConverter->UCharErrorBufferLength = 0;
   1284                     if(length > 0) {
   1285                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1286                                     myData->currentConverter->UCharErrorBuffer,
   1287                                     length*U_SIZEOF_UCHAR);
   1288                     }
   1289                     return;
   1290                 }
   1291 
   1292                 /*
   1293                  * At least one of:
   1294                  * -Error while converting
   1295                  * -Done with entire buffer
   1296                  * -Need to write offsets or update the current offset
   1297                  *  (leave that up to the code in ucnv.c)
   1298                  *
   1299                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1300                  */
   1301                 if (U_FAILURE(*err) ||
   1302                     (args->source == realSourceLimit) ||
   1303                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1304                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1305                 ) {
   1306                     /* copy partial or error input for truncated detection and error handling */
   1307                     if(U_FAILURE(*err)) {
   1308                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1309                         if(length > 0) {
   1310                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1311                         }
   1312                     } else {
   1313                         length = saveThis->toULength = myData->currentConverter->toULength;
   1314                         if(length > 0) {
   1315                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1316                             if(args->source < mySourceLimit) {
   1317                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1318                             }
   1319                         }
   1320                     }
   1321                     return;
   1322                 }
   1323             }
   1324         }
   1325 
   1326         sourceStart = args->source;
   1327         changeState_2022(args->converter,
   1328                &(args->source),
   1329                realSourceLimit,
   1330                ISO_2022,
   1331                err);
   1332         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1333             /* let the ucnv.c code update its current offset */
   1334             return;
   1335         }
   1336     }
   1337 }
   1338 
   1339 #endif
   1340 
   1341 /*
   1342  * To Unicode Callback helper function
   1343  */
   1344 static void
   1345 toUnicodeCallback(UConverter *cnv,
   1346                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1347                   UErrorCode* err){
   1348     if(sourceChar>0xff){
   1349         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1350         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1351         cnv->toULength = 2;
   1352     }
   1353     else{
   1354         cnv->toUBytes[0] =(char) sourceChar;
   1355         cnv->toULength = 1;
   1356     }
   1357 
   1358     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1359         *err = U_INVALID_CHAR_FOUND;
   1360     }
   1361     else{
   1362         *err = U_ILLEGAL_CHAR_FOUND;
   1363     }
   1364 }
   1365 
   1366 /**************************************ISO-2022-JP*************************************************/
   1367 
   1368 /************************************** IMPORTANT **************************************************
   1369 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1370 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1371 * The converter iterates over each Unicode codepoint
   1372 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1373 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1374 * would do as far as possible.
   1375 *
   1376 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1377 * sure that ISO-2022 is also changed.
   1378 ***************************************************************************************************
   1379 */
   1380 
   1381 /***************************************************************************************************
   1382 * Rules for ISO-2022-jp encoding
   1383 * (i)   Escape sequences must be fully contained within a line they should not
   1384 *       span new lines or CRs
   1385 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1386 *       JIS-Roman character escape sequence should follow before the line terminates
   1387 * (iii) If the first character on the line is represented by two bytes then a two
   1388 *       byte character escape sequence should precede it
   1389 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1390 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1391 *       and invoked with SS2 (ESC N).
   1392 * (vi)  If there is any G0 designation in text, there must be a switch to
   1393 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1394 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1395 *       characters such as tab or CRLF.
   1396 * (vi)  Supported encodings:
   1397 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1398 *
   1399 *  source : RFC-1554
   1400 *
   1401 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1402 *          KSC5601 : alias to ibm-949 mapping table
   1403 *          GB2312 : alias to ibm-1386 mapping table
   1404 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1405 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1406 */
   1407 
   1408 /* preference order of JP charsets */
   1409 static const StateEnum jpCharsetPref[]={
   1410     ASCII,
   1411     JISX201,
   1412     ISO8859_1,
   1413     ISO8859_7,
   1414     JISX208,
   1415     JISX212,
   1416     GB2312,
   1417     KSC5601,
   1418     HWKANA_7BIT
   1419 };
   1420 
   1421 /*
   1422  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1423  * not in order of jpCharsetPref[]!
   1424  */
   1425 static const char escSeqChars[][6] ={
   1426     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1427     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1428     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1429     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1430     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1431     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1432     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1433     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1434     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1435 
   1436 };
   1437 static  const int8_t escSeqCharsLen[] ={
   1438     3, /* length of <ESC>(B  ASCII       */
   1439     3, /* length of <ESC>.A  ISO-8859-1  */
   1440     3, /* length of <ESC>.F  ISO-8859-7  */
   1441     3, /* length of <ESC>(J  JISX-201    */
   1442     3, /* length of <ESC>$B  JISX-208    */
   1443     4, /* length of <ESC>$(D JISX-212    */
   1444     3, /* length of <ESC>$A  GB2312      */
   1445     4, /* length of <ESC>$(C KSC5601     */
   1446     3  /* length of <ESC>(I  HWKANA_7BIT */
   1447 };
   1448 
   1449 /*
   1450 * The iteration over various code pages works this way:
   1451 * i)   Get the currentState from myConverterData->currentState
   1452 * ii)  Check if the character is mapped to a valid character in the currentState
   1453 *      Yes ->  a) set the initIterState to currentState
   1454 *       b) remain in this state until an invalid character is found
   1455 *      No  ->  a) go to the next code page and find the character
   1456 * iii) Before changing the state increment the current state check if the current state
   1457 *      is equal to the intitIteration state
   1458 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1459 *       break and return a U_INVALID_CHARACTER error
   1460 *      No  ->  Continue and find the character in next code page
   1461 *
   1462 *
   1463 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1464 */
   1465 
   1466 /* Map 00..7F to Unicode according to JIS X 0201. */
   1467 static inline uint32_t
   1468 jisx201ToU(uint32_t value) {
   1469     if(value < 0x5c) {
   1470         return value;
   1471     } else if(value == 0x5c) {
   1472         return 0xa5;
   1473     } else if(value == 0x7e) {
   1474         return 0x203e;
   1475     } else /* value <= 0x7f */ {
   1476         return value;
   1477     }
   1478 }
   1479 
   1480 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1481 static inline uint32_t
   1482 jisx201FromU(uint32_t value) {
   1483     if(value<=0x7f) {
   1484         if(value!=0x5c && value!=0x7e) {
   1485             return value;
   1486         }
   1487     } else if(value==0xa5) {
   1488         return 0x5c;
   1489     } else if(value==0x203e) {
   1490         return 0x7e;
   1491     }
   1492     return 0xfffe;
   1493 }
   1494 
   1495 /*
   1496  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1497  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1498  * Return 0 if the byte pair is out of range.
   1499  */
   1500 static inline uint32_t
   1501 _2022FromSJIS(uint32_t value) {
   1502     uint8_t trail;
   1503 
   1504     if(value > 0xEFFC) {
   1505         return 0;  /* beyond JIS X 0208 */
   1506     }
   1507 
   1508     trail = (uint8_t)value;
   1509 
   1510     value &= 0xff00;  /* lead byte */
   1511     if(value <= 0x9f00) {
   1512         value -= 0x7000;
   1513     } else /* 0xe000 <= value <= 0xef00 */ {
   1514         value -= 0xb000;
   1515     }
   1516     value <<= 1;
   1517 
   1518     if(trail <= 0x9e) {
   1519         value -= 0x100;
   1520         if(trail <= 0x7e) {
   1521             value |= trail - 0x1f;
   1522         } else {
   1523             value |= trail - 0x20;
   1524         }
   1525     } else /* trail <= 0xfc */ {
   1526         value |= trail - 0x7e;
   1527     }
   1528     return value;
   1529 }
   1530 
   1531 /*
   1532  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1533  * If either byte is outside 21..7E make sure that the result is not valid
   1534  * for Shift-JIS so that the converter catches it.
   1535  * Some invalid byte values already turn into equally invalid Shift-JIS
   1536  * byte values and need not be tested explicitly.
   1537  */
   1538 static inline void
   1539 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1540     if(c1&1) {
   1541         ++c1;
   1542         if(c2 <= 0x5f) {
   1543             c2 += 0x1f;
   1544         } else if(c2 <= 0x7e) {
   1545             c2 += 0x20;
   1546         } else {
   1547             c2 = 0;  /* invalid */
   1548         }
   1549     } else {
   1550         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1551             c2 += 0x7e;
   1552         } else {
   1553             c2 = 0;  /* invalid */
   1554         }
   1555     }
   1556     c1 >>= 1;
   1557     if(c1 <= 0x2f) {
   1558         c1 += 0x70;
   1559     } else if(c1 <= 0x3f) {
   1560         c1 += 0xb0;
   1561     } else {
   1562         c1 = 0;  /* invalid */
   1563     }
   1564     bytes[0] = (char)c1;
   1565     bytes[1] = (char)c2;
   1566 }
   1567 
   1568 /*
   1569  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1570  * Katakana.
   1571  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1572  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1573  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1574  */
   1575 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1576     0x2123,  /* U+FF61 */
   1577     0x2156,
   1578     0x2157,
   1579     0x2122,
   1580     0x2126,
   1581     0x2572,
   1582     0x2521,
   1583     0x2523,
   1584     0x2525,
   1585     0x2527,
   1586     0x2529,
   1587     0x2563,
   1588     0x2565,
   1589     0x2567,
   1590     0x2543,
   1591     0x213C,  /* U+FF70 */
   1592     0x2522,
   1593     0x2524,
   1594     0x2526,
   1595     0x2528,
   1596     0x252A,
   1597     0x252B,
   1598     0x252D,
   1599     0x252F,
   1600     0x2531,
   1601     0x2533,
   1602     0x2535,
   1603     0x2537,
   1604     0x2539,
   1605     0x253B,
   1606     0x253D,
   1607     0x253F,  /* U+FF80 */
   1608     0x2541,
   1609     0x2544,
   1610     0x2546,
   1611     0x2548,
   1612     0x254A,
   1613     0x254B,
   1614     0x254C,
   1615     0x254D,
   1616     0x254E,
   1617     0x254F,
   1618     0x2552,
   1619     0x2555,
   1620     0x2558,
   1621     0x255B,
   1622     0x255E,
   1623     0x255F,  /* U+FF90 */
   1624     0x2560,
   1625     0x2561,
   1626     0x2562,
   1627     0x2564,
   1628     0x2566,
   1629     0x2568,
   1630     0x2569,
   1631     0x256A,
   1632     0x256B,
   1633     0x256C,
   1634     0x256D,
   1635     0x256F,
   1636     0x2573,
   1637     0x212B,
   1638     0x212C   /* U+FF9F */
   1639 };
   1640 
   1641 static void
   1642 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1643     UConverter *cnv = args->converter;
   1644     UConverterDataISO2022 *converterData;
   1645     ISO2022State *pFromU2022State;
   1646     uint8_t *target = (uint8_t *) args->target;
   1647     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1648     const UChar* source = args->source;
   1649     const UChar* sourceLimit = args->sourceLimit;
   1650     int32_t* offsets = args->offsets;
   1651     UChar32 sourceChar;
   1652     char buffer[8];
   1653     int32_t len, outLen;
   1654     int8_t choices[10];
   1655     int32_t choiceCount;
   1656     uint32_t targetValue = 0;
   1657     UBool useFallback;
   1658 
   1659     int32_t i;
   1660     int8_t cs, g;
   1661 
   1662     /* set up the state */
   1663     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1664     pFromU2022State   = &converterData->fromU2022State;
   1665 
   1666     choiceCount = 0;
   1667 
   1668     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1669     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1670         goto getTrail;
   1671     }
   1672 
   1673     while(source < sourceLimit) {
   1674         if(target < targetLimit) {
   1675 
   1676             sourceChar  = *(source++);
   1677             /*check if the char is a First surrogate*/
   1678             if(U16_IS_SURROGATE(sourceChar)) {
   1679                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   1680 getTrail:
   1681                     /*look ahead to find the trail surrogate*/
   1682                     if(source < sourceLimit) {
   1683                         /* test the following code unit */
   1684                         UChar trail=(UChar) *source;
   1685                         if(U16_IS_TRAIL(trail)) {
   1686                             source++;
   1687                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   1688                             cnv->fromUChar32=0x00;
   1689                             /* convert this supplementary code point */
   1690                             /* exit this condition tree */
   1691                         } else {
   1692                             /* this is an unmatched lead code unit (1st surrogate) */
   1693                             /* callback(illegal) */
   1694                             *err=U_ILLEGAL_CHAR_FOUND;
   1695                             cnv->fromUChar32=sourceChar;
   1696                             break;
   1697                         }
   1698                     } else {
   1699                         /* no more input */
   1700                         cnv->fromUChar32=sourceChar;
   1701                         break;
   1702                     }
   1703                 } else {
   1704                     /* this is an unmatched trail code unit (2nd surrogate) */
   1705                     /* callback(illegal) */
   1706                     *err=U_ILLEGAL_CHAR_FOUND;
   1707                     cnv->fromUChar32=sourceChar;
   1708                     break;
   1709                 }
   1710             }
   1711 
   1712             /* do not convert SO/SI/ESC */
   1713             if(IS_2022_CONTROL(sourceChar)) {
   1714                 /* callback(illegal) */
   1715                 *err=U_ILLEGAL_CHAR_FOUND;
   1716                 cnv->fromUChar32=sourceChar;
   1717                 break;
   1718             }
   1719 
   1720             /* do the conversion */
   1721 
   1722             if(choiceCount == 0) {
   1723                 uint16_t csm;
   1724 
   1725                 /*
   1726                  * The csm variable keeps track of which charsets are allowed
   1727                  * and not used yet while building the choices[].
   1728                  */
   1729                 csm = jpCharsetMasks[converterData->version];
   1730                 choiceCount = 0;
   1731 
   1732                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1733                 if(converterData->version == 3 || converterData->version == 4) {
   1734                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1735                 }
   1736                 /* Do not try single-byte half-width Katakana for other versions. */
   1737                 csm &= ~CSM(HWKANA_7BIT);
   1738 
   1739                 /* try the current G0 charset */
   1740                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1741                 csm &= ~CSM(cs);
   1742 
   1743                 /* try the current G2 charset */
   1744                 if((cs = pFromU2022State->cs[2]) != 0) {
   1745                     choices[choiceCount++] = cs;
   1746                     csm &= ~CSM(cs);
   1747                 }
   1748 
   1749                 /* try all the other possible charsets */
   1750                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1751                     cs = (int8_t)jpCharsetPref[i];
   1752                     if(CSM(cs) & csm) {
   1753                         choices[choiceCount++] = cs;
   1754                         csm &= ~CSM(cs);
   1755                     }
   1756                 }
   1757             }
   1758 
   1759             cs = g = 0;
   1760             /*
   1761              * len==0: no mapping found yet
   1762              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1763              * len>0: found a roundtrip result, done
   1764              */
   1765             len = 0;
   1766             /*
   1767              * We will turn off useFallback after finding a fallback,
   1768              * but we still get fallbacks from PUA code points as usual.
   1769              * Therefore, we will also need to check that we don't overwrite
   1770              * an early fallback with a later one.
   1771              */
   1772             useFallback = cnv->useFallback;
   1773 
   1774             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1775                 uint32_t value;
   1776                 int32_t len2;
   1777                 int8_t cs0 = choices[i];
   1778                 switch(cs0) {
   1779                 case ASCII:
   1780                     if(sourceChar <= 0x7f) {
   1781                         targetValue = (uint32_t)sourceChar;
   1782                         len = 1;
   1783                         cs = cs0;
   1784                         g = 0;
   1785                     }
   1786                     break;
   1787                 case ISO8859_1:
   1788                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1789                         targetValue = (uint32_t)sourceChar - 0x80;
   1790                         len = 1;
   1791                         cs = cs0;
   1792                         g = 2;
   1793                     }
   1794                     break;
   1795                 case HWKANA_7BIT:
   1796                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1797                         if(converterData->version==3) {
   1798                             /* JIS7: use G1 (SO) */
   1799                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1800                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1801                             len = 1;
   1802                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1803                             g = 1;
   1804                         } else if(converterData->version==4) {
   1805                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1806                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1807                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1808                             len = 1;
   1809 
   1810                             cs = pFromU2022State->cs[0];
   1811                             if(IS_JP_DBCS(cs)) {
   1812                                 /* switch from a DBCS charset to JISX201 */
   1813                                 cs = (int8_t)JISX201;
   1814                             }
   1815                             /* else stay in the current G0 charset */
   1816                             g = 0;
   1817                         }
   1818                         /* else do not use HWKANA_7BIT with other versions */
   1819                     }
   1820                     break;
   1821                 case JISX201:
   1822                     /* G0 SBCS */
   1823                     value = jisx201FromU(sourceChar);
   1824                     if(value <= 0x7f) {
   1825                         targetValue = value;
   1826                         len = 1;
   1827                         cs = cs0;
   1828                         g = 0;
   1829                         useFallback = FALSE;
   1830                     }
   1831                     break;
   1832                 case JISX208:
   1833                     /* G0 DBCS from Shift-JIS table */
   1834                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1835                                 converterData->myConverterArray[cs0],
   1836                                 sourceChar, &value,
   1837                                 useFallback, MBCS_OUTPUT_2);
   1838                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1839                         value = _2022FromSJIS(value);
   1840                         if(value != 0) {
   1841                             targetValue = value;
   1842                             len = len2;
   1843                             cs = cs0;
   1844                             g = 0;
   1845                             useFallback = FALSE;
   1846                         }
   1847                     } else if(len == 0 && useFallback &&
   1848                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1849                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1850                         len = -2;
   1851                         cs = cs0;
   1852                         g = 0;
   1853                         useFallback = FALSE;
   1854                     }
   1855                     break;
   1856                 case ISO8859_7:
   1857                     /* G0 SBCS forced to 7-bit output */
   1858                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1859                                 converterData->myConverterArray[cs0],
   1860                                 sourceChar, &value,
   1861                                 useFallback);
   1862                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1863                         targetValue = value - 0x80;
   1864                         len = len2;
   1865                         cs = cs0;
   1866                         g = 2;
   1867                         useFallback = FALSE;
   1868                     }
   1869                     break;
   1870                 default:
   1871                     /* G0 DBCS */
   1872                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1873                                 converterData->myConverterArray[cs0],
   1874                                 sourceChar, &value,
   1875                                 useFallback, MBCS_OUTPUT_2);
   1876                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1877                         if(cs0 == KSC5601) {
   1878                             /*
   1879                              * Check for valid bytes for the encoding scheme.
   1880                              * This is necessary because the sub-converter (windows-949)
   1881                              * has a broader encoding scheme than is valid for 2022.
   1882                              */
   1883                             value = _2022FromGR94DBCS(value);
   1884                             if(value == 0) {
   1885                                 break;
   1886                             }
   1887                         }
   1888                         targetValue = value;
   1889                         len = len2;
   1890                         cs = cs0;
   1891                         g = 0;
   1892                         useFallback = FALSE;
   1893                     }
   1894                     break;
   1895                 }
   1896             }
   1897 
   1898             if(len != 0) {
   1899                 if(len < 0) {
   1900                     len = -len;  /* fallback */
   1901                 }
   1902                 outLen = 0; /* count output bytes */
   1903 
   1904                 /* write SI if necessary (only for JIS7) */
   1905                 if(pFromU2022State->g == 1 && g == 0) {
   1906                     buffer[outLen++] = UCNV_SI;
   1907                     pFromU2022State->g = 0;
   1908                 }
   1909 
   1910                 /* write the designation sequence if necessary */
   1911                 if(cs != pFromU2022State->cs[g]) {
   1912                     int32_t escLen = escSeqCharsLen[cs];
   1913                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1914                     outLen += escLen;
   1915                     pFromU2022State->cs[g] = cs;
   1916 
   1917                     /* invalidate the choices[] */
   1918                     choiceCount = 0;
   1919                 }
   1920 
   1921                 /* write the shift sequence if necessary */
   1922                 if(g != pFromU2022State->g) {
   1923                     switch(g) {
   1924                     /* case 0 handled before writing escapes */
   1925                     case 1:
   1926                         buffer[outLen++] = UCNV_SO;
   1927                         pFromU2022State->g = 1;
   1928                         break;
   1929                     default: /* case 2 */
   1930                         buffer[outLen++] = 0x1b;
   1931                         buffer[outLen++] = 0x4e;
   1932                         break;
   1933                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1934                     }
   1935                 }
   1936 
   1937                 /* write the output bytes */
   1938                 if(len == 1) {
   1939                     buffer[outLen++] = (char)targetValue;
   1940                 } else /* len == 2 */ {
   1941                     buffer[outLen++] = (char)(targetValue >> 8);
   1942                     buffer[outLen++] = (char)targetValue;
   1943                 }
   1944             } else {
   1945                 /*
   1946                  * if we cannot find the character after checking all codepages
   1947                  * then this is an error
   1948                  */
   1949                 *err = U_INVALID_CHAR_FOUND;
   1950                 cnv->fromUChar32=sourceChar;
   1951                 break;
   1952             }
   1953 
   1954             if(sourceChar == CR || sourceChar == LF) {
   1955                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1956                 pFromU2022State->cs[2] = 0;
   1957                 choiceCount = 0;
   1958             }
   1959 
   1960             /* output outLen>0 bytes in buffer[] */
   1961             if(outLen == 1) {
   1962                 *target++ = buffer[0];
   1963                 if(offsets) {
   1964                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1965                 }
   1966             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1967                 *target++ = buffer[0];
   1968                 *target++ = buffer[1];
   1969                 if(offsets) {
   1970                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1971                     *offsets++ = sourceIndex;
   1972                     *offsets++ = sourceIndex;
   1973                 }
   1974             } else {
   1975                 fromUWriteUInt8(
   1976                     cnv,
   1977                     buffer, outLen,
   1978                     &target, (const char *)targetLimit,
   1979                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1980                     err);
   1981                 if(U_FAILURE(*err)) {
   1982                     break;
   1983                 }
   1984             }
   1985         } /* end if(myTargetIndex<myTargetLength) */
   1986         else{
   1987             *err =U_BUFFER_OVERFLOW_ERROR;
   1988             break;
   1989         }
   1990 
   1991     }/* end while(mySourceIndex<mySourceLength) */
   1992 
   1993     /*
   1994      * the end of the input stream and detection of truncated input
   1995      * are handled by the framework, but for ISO-2022-JP conversion
   1996      * we need to be in ASCII mode at the very end
   1997      *
   1998      * conditions:
   1999      *   successful
   2000      *   in SO mode or not in ASCII mode
   2001      *   end of input and no truncated input
   2002      */
   2003     if( U_SUCCESS(*err) &&
   2004         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   2005         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   2006     ) {
   2007         int32_t sourceIndex;
   2008 
   2009         outLen = 0;
   2010 
   2011         if(pFromU2022State->g != 0) {
   2012             buffer[outLen++] = UCNV_SI;
   2013             pFromU2022State->g = 0;
   2014         }
   2015 
   2016         if(pFromU2022State->cs[0] != ASCII) {
   2017             int32_t escLen = escSeqCharsLen[ASCII];
   2018             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   2019             outLen += escLen;
   2020             pFromU2022State->cs[0] = (int8_t)ASCII;
   2021         }
   2022 
   2023         /* get the source index of the last input character */
   2024         /*
   2025          * TODO this would be simpler and more reliable if we used a pair
   2026          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2027          * so that we could simply use the prevSourceIndex here;
   2028          * this code gives an incorrect result for the rare case of an unmatched
   2029          * trail surrogate that is alone in the last buffer of the text stream
   2030          */
   2031         sourceIndex=(int32_t)(source-args->source);
   2032         if(sourceIndex>0) {
   2033             --sourceIndex;
   2034             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2035                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2036             ) {
   2037                 --sourceIndex;
   2038             }
   2039         } else {
   2040             sourceIndex=-1;
   2041         }
   2042 
   2043         fromUWriteUInt8(
   2044             cnv,
   2045             buffer, outLen,
   2046             &target, (const char *)targetLimit,
   2047             &offsets, sourceIndex,
   2048             err);
   2049     }
   2050 
   2051     /*save the state and return */
   2052     args->source = source;
   2053     args->target = (char*)target;
   2054 }
   2055 
   2056 /*************** to unicode *******************/
   2057 
   2058 static void
   2059 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2060                                                UErrorCode* err){
   2061     char tempBuf[2];
   2062     const char *mySource = (char *) args->source;
   2063     UChar *myTarget = args->target;
   2064     const char *mySourceLimit = args->sourceLimit;
   2065     uint32_t targetUniChar = 0x0000;
   2066     uint32_t mySourceChar = 0x0000;
   2067     uint32_t tmpSourceChar = 0x0000;
   2068     UConverterDataISO2022* myData;
   2069     ISO2022State *pToU2022State;
   2070     StateEnum cs;
   2071 
   2072     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2073     pToU2022State = &myData->toU2022State;
   2074 
   2075     if(myData->key != 0) {
   2076         /* continue with a partial escape sequence */
   2077         goto escape;
   2078     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2079         /* continue with a partial double-byte character */
   2080         mySourceChar = args->converter->toUBytes[0];
   2081         args->converter->toULength = 0;
   2082         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2083         targetUniChar = missingCharMarker;
   2084         goto getTrailByte;
   2085     }
   2086 
   2087     while(mySource < mySourceLimit){
   2088 
   2089         targetUniChar =missingCharMarker;
   2090 
   2091         if(myTarget < args->targetLimit){
   2092 
   2093             mySourceChar= (unsigned char) *mySource++;
   2094 
   2095             switch(mySourceChar) {
   2096             case UCNV_SI:
   2097                 if(myData->version==3) {
   2098                     pToU2022State->g=0;
   2099                     continue;
   2100                 } else {
   2101                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2102                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2103                     break;
   2104                 }
   2105 
   2106             case UCNV_SO:
   2107                 if(myData->version==3) {
   2108                     /* JIS7: switch to G1 half-width Katakana */
   2109                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2110                     pToU2022State->g=1;
   2111                     continue;
   2112                 } else {
   2113                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2114                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2115                     break;
   2116                 }
   2117 
   2118             case ESC_2022:
   2119                 mySource--;
   2120 escape:
   2121                 {
   2122                     const char * mySourceBefore = mySource;
   2123                     int8_t toULengthBefore = args->converter->toULength;
   2124 
   2125                     changeState_2022(args->converter,&(mySource),
   2126                         mySourceLimit, ISO_2022_JP,err);
   2127 
   2128                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2129                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2130                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2131                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2132                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   2133                     }
   2134                 }
   2135 
   2136                 /* invalid or illegal escape sequence */
   2137                 if(U_FAILURE(*err)){
   2138                     args->target = myTarget;
   2139                     args->source = mySource;
   2140                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2141                     return;
   2142                 }
   2143                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2144                 if(myData->key==0) {
   2145                     myData->isEmptySegment = TRUE;
   2146                 }
   2147                 continue;
   2148 
   2149             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2150 
   2151             case CR:
   2152                 /*falls through*/
   2153             case LF:
   2154                 /* automatically reset to single-byte mode */
   2155                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2156                     pToU2022State->cs[0] = (int8_t)ASCII;
   2157                 }
   2158                 pToU2022State->cs[2] = 0;
   2159                 pToU2022State->g = 0;
   2160                 /* falls through */
   2161             default:
   2162                 /* convert one or two bytes */
   2163                 myData->isEmptySegment = FALSE;
   2164                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2165                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2166                     !IS_JP_DBCS(cs)
   2167                 ) {
   2168                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2169                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2170 
   2171                     /* return from a single-shift state to the previous one */
   2172                     if(pToU2022State->g >= 2) {
   2173                         pToU2022State->g=pToU2022State->prevG;
   2174                     }
   2175                 } else switch(cs) {
   2176                 case ASCII:
   2177                     if(mySourceChar <= 0x7f) {
   2178                         targetUniChar = mySourceChar;
   2179                     }
   2180                     break;
   2181                 case ISO8859_1:
   2182                     if(mySourceChar <= 0x7f) {
   2183                         targetUniChar = mySourceChar + 0x80;
   2184                     }
   2185                     /* return from a single-shift state to the previous one */
   2186                     pToU2022State->g=pToU2022State->prevG;
   2187                     break;
   2188                 case ISO8859_7:
   2189                     if(mySourceChar <= 0x7f) {
   2190                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2191                         targetUniChar =
   2192                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2193                                 myData->myConverterArray[cs],
   2194                                 mySourceChar + 0x80);
   2195                     }
   2196                     /* return from a single-shift state to the previous one */
   2197                     pToU2022State->g=pToU2022State->prevG;
   2198                     break;
   2199                 case JISX201:
   2200                     if(mySourceChar <= 0x7f) {
   2201                         targetUniChar = jisx201ToU(mySourceChar);
   2202                     }
   2203                     break;
   2204                 case HWKANA_7BIT:
   2205                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2206                         /* 7-bit halfwidth Katakana */
   2207                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2208                     }
   2209                     break;
   2210                 default:
   2211                     /* G0 DBCS */
   2212                     if(mySource < mySourceLimit) {
   2213                         int leadIsOk, trailIsOk;
   2214                         uint8_t trailByte;
   2215 getTrailByte:
   2216                         trailByte = (uint8_t)*mySource;
   2217                         /*
   2218                          * Ticket 5691: consistent illegal sequences:
   2219                          * - We include at least the first byte in the illegal sequence.
   2220                          * - If any of the non-initial bytes could be the start of a character,
   2221                          *   we stop the illegal sequence before the first one of those.
   2222                          *
   2223                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2224                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2225                          * Otherwise we convert or report the pair of bytes.
   2226                          */
   2227                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2228                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2229                         if (leadIsOk && trailIsOk) {
   2230                             ++mySource;
   2231                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2232                             if(cs == JISX208) {
   2233                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2234                                 mySourceChar = tmpSourceChar;
   2235                             } else {
   2236                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2237                                 mySourceChar = tmpSourceChar;
   2238                                 if (cs == KSC5601) {
   2239                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2240                                 }
   2241                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2242                                 tempBuf[1] = (char)(tmpSourceChar);
   2243                             }
   2244                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2245                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2246                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2247                             ++mySource;
   2248                             /* add another bit so that the code below writes 2 bytes in case of error */
   2249                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2250                         }
   2251                     } else {
   2252                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2253                         args->converter->toULength = 1;
   2254                         goto endloop;
   2255                     }
   2256                 }  /* End of inner switch */
   2257                 break;
   2258             }  /* End of outer switch */
   2259             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2260                 if(args->offsets){
   2261                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2262                 }
   2263                 *(myTarget++)=(UChar)targetUniChar;
   2264             }
   2265             else if(targetUniChar > missingCharMarker){
   2266                 /* disassemble the surrogate pair and write to output*/
   2267                 targetUniChar-=0x0010000;
   2268                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2269                 if(args->offsets){
   2270                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2271                 }
   2272                 ++myTarget;
   2273                 if(myTarget< args->targetLimit){
   2274                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2275                     if(args->offsets){
   2276                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2277                     }
   2278                     ++myTarget;
   2279                 }else{
   2280                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2281                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2282                 }
   2283 
   2284             }
   2285             else{
   2286                 /* Call the callback function*/
   2287                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2288                 break;
   2289             }
   2290         }
   2291         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2292             *err =U_BUFFER_OVERFLOW_ERROR;
   2293             break;
   2294         }
   2295     }
   2296 endloop:
   2297     args->target = myTarget;
   2298     args->source = mySource;
   2299 }
   2300 
   2301 
   2302 /***************************************************************
   2303 *   Rules for ISO-2022-KR encoding
   2304 *   i) The KSC5601 designator sequence should appear only once in a file,
   2305 *      at the begining of a line before any KSC5601 characters. This usually
   2306 *      means that it appears by itself on the first line of the file
   2307 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2308 *      and SI to shift into single byte mode
   2309 */
   2310 static void
   2311 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2312 
   2313     UConverter* saveConv = args->converter;
   2314     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2315     args->converter=myConverterData->currentConverter;
   2316 
   2317     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2318     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2319     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2320 
   2321     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2322         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2323             uprv_memcpy(
   2324                 saveConv->charErrorBuffer,
   2325                 myConverterData->currentConverter->charErrorBuffer,
   2326                 myConverterData->currentConverter->charErrorBufferLength);
   2327         }
   2328         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2329         myConverterData->currentConverter->charErrorBufferLength = 0;
   2330     }
   2331     args->converter=saveConv;
   2332 }
   2333 
   2334 static void
   2335 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2336 
   2337     const UChar *source = args->source;
   2338     const UChar *sourceLimit = args->sourceLimit;
   2339     unsigned char *target = (unsigned char *) args->target;
   2340     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2341     int32_t* offsets = args->offsets;
   2342     uint32_t targetByteUnit = 0x0000;
   2343     UChar32 sourceChar = 0x0000;
   2344     UBool isTargetByteDBCS;
   2345     UBool oldIsTargetByteDBCS;
   2346     UConverterDataISO2022 *converterData;
   2347     UConverterSharedData* sharedData;
   2348     UBool useFallback;
   2349     int32_t length =0;
   2350 
   2351     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2352     /* if the version is 1 then the user is requesting
   2353      * conversion with ibm-25546 pass the arguments to
   2354      * MBCS converter and return
   2355      */
   2356     if(converterData->version==1){
   2357         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2358         return;
   2359     }
   2360 
   2361     /* initialize data */
   2362     sharedData = converterData->currentConverter->sharedData;
   2363     useFallback = args->converter->useFallback;
   2364     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2365     oldIsTargetByteDBCS = isTargetByteDBCS;
   2366 
   2367     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2368     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2369         goto getTrail;
   2370     }
   2371     while(source < sourceLimit){
   2372 
   2373         targetByteUnit = missingCharMarker;
   2374 
   2375         if(target < (unsigned char*) args->targetLimit){
   2376             sourceChar = *source++;
   2377 
   2378             /* do not convert SO/SI/ESC */
   2379             if(IS_2022_CONTROL(sourceChar)) {
   2380                 /* callback(illegal) */
   2381                 *err=U_ILLEGAL_CHAR_FOUND;
   2382                 args->converter->fromUChar32=sourceChar;
   2383                 break;
   2384             }
   2385 
   2386             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2387             if(length < 0) {
   2388                 length = -length;  /* fallback */
   2389             }
   2390             /* only DBCS or SBCS characters are expected*/
   2391             /* DB characters with high bit set to 1 are expected */
   2392             if( length > 2 || length==0 ||
   2393                 (length == 1 && targetByteUnit > 0x7f) ||
   2394                 (length == 2 &&
   2395                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2396                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2397             ) {
   2398                 targetByteUnit=missingCharMarker;
   2399             }
   2400             if (targetByteUnit != missingCharMarker){
   2401 
   2402                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2403                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2404                   /* append the shift sequence */
   2405                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2406 
   2407                     if (isTargetByteDBCS)
   2408                         *target++ = UCNV_SO;
   2409                     else
   2410                         *target++ = UCNV_SI;
   2411                     if(offsets)
   2412                         *(offsets++) = (int32_t)(source - args->source-1);
   2413                 }
   2414                 /* write the targetUniChar  to target */
   2415                 if(targetByteUnit <= 0x00FF){
   2416                     if( target < targetLimit){
   2417                         *(target++) = (unsigned char) targetByteUnit;
   2418                         if(offsets){
   2419                             *(offsets++) = (int32_t)(source - args->source-1);
   2420                         }
   2421 
   2422                     }else{
   2423                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2424                         *err = U_BUFFER_OVERFLOW_ERROR;
   2425                     }
   2426                 }else{
   2427                     if(target < targetLimit){
   2428                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2429                         if(offsets){
   2430                             *(offsets++) = (int32_t)(source - args->source-1);
   2431                         }
   2432                         if(target < targetLimit){
   2433                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2434                             if(offsets){
   2435                                 *(offsets++) = (int32_t)(source - args->source-1);
   2436                             }
   2437                         }else{
   2438                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2439                             *err = U_BUFFER_OVERFLOW_ERROR;
   2440                         }
   2441                     }else{
   2442                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2443                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2444                         *err = U_BUFFER_OVERFLOW_ERROR;
   2445                     }
   2446                 }
   2447 
   2448             }
   2449             else{
   2450                 /* oops.. the code point is unassingned
   2451                  * set the error and reason
   2452                  */
   2453 
   2454                 /*check if the char is a First surrogate*/
   2455                 if(U16_IS_SURROGATE(sourceChar)) {
   2456                     if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2457 getTrail:
   2458                         /*look ahead to find the trail surrogate*/
   2459                         if(source <  sourceLimit) {
   2460                             /* test the following code unit */
   2461                             UChar trail=(UChar) *source;
   2462                             if(U16_IS_TRAIL(trail)) {
   2463                                 source++;
   2464                                 sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2465                                 *err = U_INVALID_CHAR_FOUND;
   2466                                 /* convert this surrogate code point */
   2467                                 /* exit this condition tree */
   2468                             } else {
   2469                                 /* this is an unmatched lead code unit (1st surrogate) */
   2470                                 /* callback(illegal) */
   2471                                 *err=U_ILLEGAL_CHAR_FOUND;
   2472                             }
   2473                         } else {
   2474                             /* no more input */
   2475                             *err = U_ZERO_ERROR;
   2476                         }
   2477                     } else {
   2478                         /* this is an unmatched trail code unit (2nd surrogate) */
   2479                         /* callback(illegal) */
   2480                         *err=U_ILLEGAL_CHAR_FOUND;
   2481                     }
   2482                 } else {
   2483                     /* callback(unassigned) for a BMP code point */
   2484                     *err = U_INVALID_CHAR_FOUND;
   2485                 }
   2486 
   2487                 args->converter->fromUChar32=sourceChar;
   2488                 break;
   2489             }
   2490         } /* end if(myTargetIndex<myTargetLength) */
   2491         else{
   2492             *err =U_BUFFER_OVERFLOW_ERROR;
   2493             break;
   2494         }
   2495 
   2496     }/* end while(mySourceIndex<mySourceLength) */
   2497 
   2498     /*
   2499      * the end of the input stream and detection of truncated input
   2500      * are handled by the framework, but for ISO-2022-KR conversion
   2501      * we need to be in ASCII mode at the very end
   2502      *
   2503      * conditions:
   2504      *   successful
   2505      *   not in ASCII mode
   2506      *   end of input and no truncated input
   2507      */
   2508     if( U_SUCCESS(*err) &&
   2509         isTargetByteDBCS &&
   2510         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2511     ) {
   2512         int32_t sourceIndex;
   2513 
   2514         /* we are switching to ASCII */
   2515         isTargetByteDBCS=FALSE;
   2516 
   2517         /* get the source index of the last input character */
   2518         /*
   2519          * TODO this would be simpler and more reliable if we used a pair
   2520          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2521          * so that we could simply use the prevSourceIndex here;
   2522          * this code gives an incorrect result for the rare case of an unmatched
   2523          * trail surrogate that is alone in the last buffer of the text stream
   2524          */
   2525         sourceIndex=(int32_t)(source-args->source);
   2526         if(sourceIndex>0) {
   2527             --sourceIndex;
   2528             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2529                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2530             ) {
   2531                 --sourceIndex;
   2532             }
   2533         } else {
   2534             sourceIndex=-1;
   2535         }
   2536 
   2537         fromUWriteUInt8(
   2538             args->converter,
   2539             SHIFT_IN_STR, 1,
   2540             &target, (const char *)targetLimit,
   2541             &offsets, sourceIndex,
   2542             err);
   2543     }
   2544 
   2545     /*save the state and return */
   2546     args->source = source;
   2547     args->target = (char*)target;
   2548     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2549 }
   2550 
   2551 /************************ To Unicode ***************************************/
   2552 
   2553 static void
   2554 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2555                                                             UErrorCode* err){
   2556     char const* sourceStart;
   2557     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2558 
   2559     UConverterToUnicodeArgs subArgs;
   2560     int32_t minArgsSize;
   2561 
   2562     /* set up the subconverter arguments */
   2563     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2564         minArgsSize = args->size;
   2565     } else {
   2566         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2567     }
   2568 
   2569     uprv_memcpy(&subArgs, args, minArgsSize);
   2570     subArgs.size = (uint16_t)minArgsSize;
   2571     subArgs.converter = myData->currentConverter;
   2572 
   2573     /* remember the original start of the input for offsets */
   2574     sourceStart = args->source;
   2575 
   2576     if(myData->key != 0) {
   2577         /* continue with a partial escape sequence */
   2578         goto escape;
   2579     }
   2580 
   2581     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2582         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2583         subArgs.source = args->source;
   2584         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2585         if(subArgs.source != subArgs.sourceLimit) {
   2586             /*
   2587              * get the current partial byte sequence
   2588              *
   2589              * it needs to be moved between the public and the subconverter
   2590              * so that the conversion framework, which only sees the public
   2591              * converter, can handle truncated and illegal input etc.
   2592              */
   2593             if(args->converter->toULength > 0) {
   2594                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2595             }
   2596             subArgs.converter->toULength = args->converter->toULength;
   2597 
   2598             /*
   2599              * Convert up to the end of the input, or to before the next escape character.
   2600              * Does not handle conversion extensions because the preToU[] state etc.
   2601              * is not copied.
   2602              */
   2603             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2604 
   2605             if(args->offsets != NULL && sourceStart != args->source) {
   2606                 /* update offsets to base them on the actual start of the input */
   2607                 int32_t *offsets = args->offsets;
   2608                 UChar *target = args->target;
   2609                 int32_t delta = (int32_t)(args->source - sourceStart);
   2610                 while(target < subArgs.target) {
   2611                     if(*offsets >= 0) {
   2612                         *offsets += delta;
   2613                     }
   2614                     ++offsets;
   2615                     ++target;
   2616                 }
   2617             }
   2618             args->source = subArgs.source;
   2619             args->target = subArgs.target;
   2620             args->offsets = subArgs.offsets;
   2621 
   2622             /* copy input/error/overflow buffers */
   2623             if(subArgs.converter->toULength > 0) {
   2624                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2625             }
   2626             args->converter->toULength = subArgs.converter->toULength;
   2627 
   2628             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2629                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2630                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2631                                 subArgs.converter->UCharErrorBufferLength);
   2632                 }
   2633                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2634                 subArgs.converter->UCharErrorBufferLength = 0;
   2635             }
   2636         }
   2637 
   2638         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2639             return;
   2640         }
   2641 
   2642 escape:
   2643         changeState_2022(args->converter,
   2644                &(args->source),
   2645                args->sourceLimit,
   2646                ISO_2022_KR,
   2647                err);
   2648     }
   2649 }
   2650 
   2651 static void
   2652 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2653                                                             UErrorCode* err){
   2654     char tempBuf[2];
   2655     const char *mySource = ( char *) args->source;
   2656     UChar *myTarget = args->target;
   2657     const char *mySourceLimit = args->sourceLimit;
   2658     UChar32 targetUniChar = 0x0000;
   2659     UChar mySourceChar = 0x0000;
   2660     UConverterDataISO2022* myData;
   2661     UConverterSharedData* sharedData ;
   2662     UBool useFallback;
   2663 
   2664     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2665     if(myData->version==1){
   2666         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2667         return;
   2668     }
   2669 
   2670     /* initialize state */
   2671     sharedData = myData->currentConverter->sharedData;
   2672     useFallback = args->converter->useFallback;
   2673 
   2674     if(myData->key != 0) {
   2675         /* continue with a partial escape sequence */
   2676         goto escape;
   2677     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2678         /* continue with a partial double-byte character */
   2679         mySourceChar = args->converter->toUBytes[0];
   2680         args->converter->toULength = 0;
   2681         goto getTrailByte;
   2682     }
   2683 
   2684     while(mySource< mySourceLimit){
   2685 
   2686         if(myTarget < args->targetLimit){
   2687 
   2688             mySourceChar= (unsigned char) *mySource++;
   2689 
   2690             if(mySourceChar==UCNV_SI){
   2691                 myData->toU2022State.g = 0;
   2692                 if (myData->isEmptySegment) {
   2693                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2694                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2695                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2696                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2697                     args->converter->toULength = 1;
   2698                     args->target = myTarget;
   2699                     args->source = mySource;
   2700                     return;
   2701                 }
   2702                 /*consume the source */
   2703                 continue;
   2704             }else if(mySourceChar==UCNV_SO){
   2705                 myData->toU2022State.g = 1;
   2706                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2707                 /*consume the source */
   2708                 continue;
   2709             }else if(mySourceChar==ESC_2022){
   2710                 mySource--;
   2711 escape:
   2712                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2713                 changeState_2022(args->converter,&(mySource),
   2714                                 mySourceLimit, ISO_2022_KR, err);
   2715                 if(U_FAILURE(*err)){
   2716                     args->target = myTarget;
   2717                     args->source = mySource;
   2718                     return;
   2719                 }
   2720                 continue;
   2721             }
   2722 
   2723             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2724             if(myData->toU2022State.g == 1) {
   2725                 if(mySource < mySourceLimit) {
   2726                     int leadIsOk, trailIsOk;
   2727                     uint8_t trailByte;
   2728 getTrailByte:
   2729                     targetUniChar = missingCharMarker;
   2730                     trailByte = (uint8_t)*mySource;
   2731                     /*
   2732                      * Ticket 5691: consistent illegal sequences:
   2733                      * - We include at least the first byte in the illegal sequence.
   2734                      * - If any of the non-initial bytes could be the start of a character,
   2735                      *   we stop the illegal sequence before the first one of those.
   2736                      *
   2737                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2738                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2739                      * Otherwise we convert or report the pair of bytes.
   2740                      */
   2741                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2742                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2743                     if (leadIsOk && trailIsOk) {
   2744                         ++mySource;
   2745                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2746                         tempBuf[1] = (char)(trailByte + 0x80);
   2747                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2748                         mySourceChar = (mySourceChar << 8) | trailByte;
   2749                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2750                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2751                         ++mySource;
   2752                         /* add another bit so that the code below writes 2 bytes in case of error */
   2753                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2754                     }
   2755                 } else {
   2756                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2757                     args->converter->toULength = 1;
   2758                     break;
   2759                 }
   2760             }
   2761             else if(mySourceChar <= 0x7f) {
   2762                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2763             } else {
   2764                 targetUniChar = 0xffff;
   2765             }
   2766             if(targetUniChar < 0xfffe){
   2767                 if(args->offsets) {
   2768                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2769                 }
   2770                 *(myTarget++)=(UChar)targetUniChar;
   2771             }
   2772             else {
   2773                 /* Call the callback function*/
   2774                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2775                 break;
   2776             }
   2777         }
   2778         else{
   2779             *err =U_BUFFER_OVERFLOW_ERROR;
   2780             break;
   2781         }
   2782     }
   2783     args->target = myTarget;
   2784     args->source = mySource;
   2785 }
   2786 
   2787 /*************************** END ISO2022-KR *********************************/
   2788 
   2789 /*************************** ISO-2022-CN *********************************
   2790 *
   2791 * Rules for ISO-2022-CN Encoding:
   2792 * i)   The designator sequence must appear once on a line before any instance
   2793 *      of character set it designates.
   2794 * ii)  If two lines contain characters from the same character set, both lines
   2795 *      must include the designator sequence.
   2796 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2797 *      to invoke the  shifting
   2798 * iv)  All lines start in ASCII and end in ASCII.
   2799 * v)   Four shifting sequences are employed for this purpose:
   2800 *
   2801 *      Sequcence   ASCII Eq    Charsets
   2802 *      ----------  -------    ---------
   2803 *      SI           <SI>        US-ASCII
   2804 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2805 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2806 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2807 *
   2808 * vi)
   2809 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2810 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2811 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2812 *
   2813 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2814 *       characters as defined in GB 2312-80, until
   2815 *       another SOdesignation appears
   2816 *
   2817 *
   2818 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2819 *       in ISO-IR-165 (for details, see section 2.1),
   2820 *       until another SOdesignation appears
   2821 *
   2822 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2823 *       in CNS 11643-plane-1, until another
   2824 *       SOdesignation appears
   2825 *
   2826 *      ESC $ * H       Indicates the two bytes immediately following
   2827 *       SS2 is a Chinese character as defined in CNS
   2828 *       11643-plane-2, until another SS2designation
   2829 *       appears
   2830 *       (Meaning <ESC>N must preceed every 2 byte
   2831 *        sequence.)
   2832 *
   2833 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2834 *       is a Chinese character as defined in CNS
   2835 *       11643-plane-3, until another SS3designation
   2836 *       appears
   2837 *       (Meaning <ESC>O must preceed every 2 byte
   2838 *        sequence.)
   2839 *
   2840 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2841 *       is a Chinese character as defined in CNS
   2842 *       11643-plane-4, until another SS3designation
   2843 *       appears
   2844 *       (In English: <ESC>O must preceed every 2 byte
   2845 *        sequence.)
   2846 *
   2847 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2848 *       is a Chinese character as defined in CNS
   2849 *       11643-plane-5, until another SS3designation
   2850 *       appears
   2851 *
   2852 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2853 *       is a Chinese character as defined in CNS
   2854 *       11643-plane-6, until another SS3designation
   2855 *       appears
   2856 *
   2857 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2858 *       is a Chinese character as defined in CNS
   2859 *       11643-plane-7, until another SS3designation
   2860 *       appears
   2861 *
   2862 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2863 *       has its own designation information before any Chinese characters
   2864 *       appear
   2865 *
   2866 */
   2867 
   2868 /* The following are defined this way to make the strings truly readonly */
   2869 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2870 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2871 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2872 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2873 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2874 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2875 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2876 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2877 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2878 
   2879 /********************** ISO2022-CN Data **************************/
   2880 static const char* const escSeqCharsCN[10] ={
   2881         SHIFT_IN_STR,                   /* 0 ASCII */
   2882         GB_2312_80_STR,                 /* 1 GB2312_1 */
   2883         ISO_IR_165_STR,                 /* 2 ISO_IR_165 */
   2884         CNS_11643_1992_Plane_1_STR,
   2885         CNS_11643_1992_Plane_2_STR,
   2886         CNS_11643_1992_Plane_3_STR,
   2887         CNS_11643_1992_Plane_4_STR,
   2888         CNS_11643_1992_Plane_5_STR,
   2889         CNS_11643_1992_Plane_6_STR,
   2890         CNS_11643_1992_Plane_7_STR
   2891 };
   2892 
   2893 static void
   2894 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2895     UConverter *cnv = args->converter;
   2896     UConverterDataISO2022 *converterData;
   2897     ISO2022State *pFromU2022State;
   2898     uint8_t *target = (uint8_t *) args->target;
   2899     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2900     const UChar* source = args->source;
   2901     const UChar* sourceLimit = args->sourceLimit;
   2902     int32_t* offsets = args->offsets;
   2903     UChar32 sourceChar;
   2904     char buffer[8];
   2905     int32_t len;
   2906     int8_t choices[3];
   2907     int32_t choiceCount;
   2908     uint32_t targetValue = 0;
   2909     UBool useFallback;
   2910 
   2911     /* set up the state */
   2912     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2913     pFromU2022State   = &converterData->fromU2022State;
   2914 
   2915     choiceCount = 0;
   2916 
   2917     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2918     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2919         goto getTrail;
   2920     }
   2921 
   2922     while( source < sourceLimit){
   2923         if(target < targetLimit){
   2924 
   2925             sourceChar  = *(source++);
   2926             /*check if the char is a First surrogate*/
   2927              if(U16_IS_SURROGATE(sourceChar)) {
   2928                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2929 getTrail:
   2930                     /*look ahead to find the trail surrogate*/
   2931                     if(source < sourceLimit) {
   2932                         /* test the following code unit */
   2933                         UChar trail=(UChar) *source;
   2934                         if(U16_IS_TRAIL(trail)) {
   2935                             source++;
   2936                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2937                             cnv->fromUChar32=0x00;
   2938                             /* convert this supplementary code point */
   2939                             /* exit this condition tree */
   2940                         } else {
   2941                             /* this is an unmatched lead code unit (1st surrogate) */
   2942                             /* callback(illegal) */
   2943                             *err=U_ILLEGAL_CHAR_FOUND;
   2944                             cnv->fromUChar32=sourceChar;
   2945                             break;
   2946                         }
   2947                     } else {
   2948                         /* no more input */
   2949                         cnv->fromUChar32=sourceChar;
   2950                         break;
   2951                     }
   2952                 } else {
   2953                     /* this is an unmatched trail code unit (2nd surrogate) */
   2954                     /* callback(illegal) */
   2955                     *err=U_ILLEGAL_CHAR_FOUND;
   2956                     cnv->fromUChar32=sourceChar;
   2957                     break;
   2958                 }
   2959             }
   2960 
   2961             /* do the conversion */
   2962             if(sourceChar <= 0x007f ){
   2963                 /* do not convert SO/SI/ESC */
   2964                 if(IS_2022_CONTROL(sourceChar)) {
   2965                     /* callback(illegal) */
   2966                     *err=U_ILLEGAL_CHAR_FOUND;
   2967                     cnv->fromUChar32=sourceChar;
   2968                     break;
   2969                 }
   2970 
   2971                 /* US-ASCII */
   2972                 if(pFromU2022State->g == 0) {
   2973                     buffer[0] = (char)sourceChar;
   2974                     len = 1;
   2975                 } else {
   2976                     buffer[0] = UCNV_SI;
   2977                     buffer[1] = (char)sourceChar;
   2978                     len = 2;
   2979                     pFromU2022State->g = 0;
   2980                     choiceCount = 0;
   2981                 }
   2982                 if(sourceChar == CR || sourceChar == LF) {
   2983                     /* reset the state at the end of a line */
   2984                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2985                     choiceCount = 0;
   2986                 }
   2987             }
   2988             else{
   2989                 /* convert U+0080..U+10ffff */
   2990                 int32_t i;
   2991                 int8_t cs, g;
   2992 
   2993                 if(choiceCount == 0) {
   2994                     /* try the current SO/G1 converter first */
   2995                     choices[0] = pFromU2022State->cs[1];
   2996 
   2997                     /* default to GB2312_1 if none is designated yet */
   2998                     if(choices[0] == 0) {
   2999                         choices[0] = GB2312_1;
   3000                     }
   3001 
   3002                     if(converterData->version == 0) {
   3003                         /* ISO-2022-CN */
   3004 
   3005                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   3006                         if(choices[0] == GB2312_1) {
   3007                             choices[1] = (int8_t)CNS_11643_1;
   3008                         } else {
   3009                             choices[1] = (int8_t)GB2312_1;
   3010                         }
   3011 
   3012                         choiceCount = 2;
   3013                     } else if (converterData->version == 1) {
   3014                         /* ISO-2022-CN-EXT */
   3015 
   3016                         /* try one of the other converters */
   3017                         switch(choices[0]) {
   3018                         case GB2312_1:
   3019                             choices[1] = (int8_t)CNS_11643_1;
   3020                             choices[2] = (int8_t)ISO_IR_165;
   3021                             break;
   3022                         case ISO_IR_165:
   3023                             choices[1] = (int8_t)GB2312_1;
   3024                             choices[2] = (int8_t)CNS_11643_1;
   3025                             break;
   3026                         default: /* CNS_11643_x */
   3027                             choices[1] = (int8_t)GB2312_1;
   3028                             choices[2] = (int8_t)ISO_IR_165;
   3029                             break;
   3030                         }
   3031 
   3032                         choiceCount = 3;
   3033                     } else {
   3034                         choices[0] = (int8_t)CNS_11643_1;
   3035                         choices[1] = (int8_t)GB2312_1;
   3036                     }
   3037                 }
   3038 
   3039                 cs = g = 0;
   3040                 /*
   3041                  * len==0: no mapping found yet
   3042                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3043                  * len>0: found a roundtrip result, done
   3044                  */
   3045                 len = 0;
   3046                 /*
   3047                  * We will turn off useFallback after finding a fallback,
   3048                  * but we still get fallbacks from PUA code points as usual.
   3049                  * Therefore, we will also need to check that we don't overwrite
   3050                  * an early fallback with a later one.
   3051                  */
   3052                 useFallback = cnv->useFallback;
   3053 
   3054                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3055                     int8_t cs0 = choices[i];
   3056                     if(cs0 > 0) {
   3057                         uint32_t value;
   3058                         int32_t len2;
   3059                         if(cs0 >= CNS_11643_0) {
   3060                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3061                                         converterData->myConverterArray[CNS_11643],
   3062                                         sourceChar,
   3063                                         &value,
   3064                                         useFallback,
   3065                                         MBCS_OUTPUT_3);
   3066                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3067                                 targetValue = value;
   3068                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3069                                 if(len2 >= 0) {
   3070                                     len = 2;
   3071                                 } else {
   3072                                     len = -2;
   3073                                     useFallback = FALSE;
   3074                                 }
   3075                                 if(cs == CNS_11643_1) {
   3076                                     g = 1;
   3077                                 } else if(cs == CNS_11643_2) {
   3078                                     g = 2;
   3079                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3080                                     g = 3;
   3081                                 } else {
   3082                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3083                                     len = 0;
   3084                                 }
   3085                             }
   3086                         } else {
   3087                             /* GB2312_1 or ISO-IR-165 */
   3088                             U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
   3089                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3090                                         converterData->myConverterArray[cs0],
   3091                                         sourceChar,
   3092                                         &value,
   3093                                         useFallback,
   3094                                         MBCS_OUTPUT_2);
   3095                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3096                                 targetValue = value;
   3097                                 len = len2;
   3098                                 cs = cs0;
   3099                                 g = 1;
   3100                                 useFallback = FALSE;
   3101                             }
   3102                         }
   3103                     }
   3104                 }
   3105 
   3106                 if(len != 0) {
   3107                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3108 
   3109                     /* write the designation sequence if necessary */
   3110                     if(cs != pFromU2022State->cs[g]) {
   3111                         if(cs < CNS_11643) {
   3112                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3113                         } else {
   3114                             U_ASSERT(cs >= CNS_11643_1);
   3115                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3116                         }
   3117                         len = 4;
   3118                         pFromU2022State->cs[g] = cs;
   3119                         if(g == 1) {
   3120                             /* changing the SO/G1 charset invalidates the choices[] */
   3121                             choiceCount = 0;
   3122                         }
   3123                     }
   3124 
   3125                     /* write the shift sequence if necessary */
   3126                     if(g != pFromU2022State->g) {
   3127                         switch(g) {
   3128                         case 1:
   3129                             buffer[len++] = UCNV_SO;
   3130 
   3131                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3132                             pFromU2022State->g = 1;
   3133                             break;
   3134                         case 2:
   3135                             buffer[len++] = 0x1b;
   3136                             buffer[len++] = 0x4e;
   3137                             break;
   3138                         default: /* case 3 */
   3139                             buffer[len++] = 0x1b;
   3140                             buffer[len++] = 0x4f;
   3141                             break;
   3142                         }
   3143                     }
   3144 
   3145                     /* write the two output bytes */
   3146                     buffer[len++] = (char)(targetValue >> 8);
   3147                     buffer[len++] = (char)targetValue;
   3148                 } else {
   3149                     /* if we cannot find the character after checking all codepages
   3150                      * then this is an error
   3151                      */
   3152                     *err = U_INVALID_CHAR_FOUND;
   3153                     cnv->fromUChar32=sourceChar;
   3154                     break;
   3155                 }
   3156             }
   3157 
   3158             /* output len>0 bytes in buffer[] */
   3159             if(len == 1) {
   3160                 *target++ = buffer[0];
   3161                 if(offsets) {
   3162                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3163                 }
   3164             } else if(len == 2 && (target + 2) <= targetLimit) {
   3165                 *target++ = buffer[0];
   3166                 *target++ = buffer[1];
   3167                 if(offsets) {
   3168                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3169                     *offsets++ = sourceIndex;
   3170                     *offsets++ = sourceIndex;
   3171                 }
   3172             } else {
   3173                 fromUWriteUInt8(
   3174                     cnv,
   3175                     buffer, len,
   3176                     &target, (const char *)targetLimit,
   3177                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3178                     err);
   3179                 if(U_FAILURE(*err)) {
   3180                     break;
   3181                 }
   3182             }
   3183         } /* end if(myTargetIndex<myTargetLength) */
   3184         else{
   3185             *err =U_BUFFER_OVERFLOW_ERROR;
   3186             break;
   3187         }
   3188 
   3189     }/* end while(mySourceIndex<mySourceLength) */
   3190 
   3191     /*
   3192      * the end of the input stream and detection of truncated input
   3193      * are handled by the framework, but for ISO-2022-CN conversion
   3194      * we need to be in ASCII mode at the very end
   3195      *
   3196      * conditions:
   3197      *   successful
   3198      *   not in ASCII mode
   3199      *   end of input and no truncated input
   3200      */
   3201     if( U_SUCCESS(*err) &&
   3202         pFromU2022State->g!=0 &&
   3203         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3204     ) {
   3205         int32_t sourceIndex;
   3206 
   3207         /* we are switching to ASCII */
   3208         pFromU2022State->g=0;
   3209 
   3210         /* get the source index of the last input character */
   3211         /*
   3212          * TODO this would be simpler and more reliable if we used a pair
   3213          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3214          * so that we could simply use the prevSourceIndex here;
   3215          * this code gives an incorrect result for the rare case of an unmatched
   3216          * trail surrogate that is alone in the last buffer of the text stream
   3217          */
   3218         sourceIndex=(int32_t)(source-args->source);
   3219         if(sourceIndex>0) {
   3220             --sourceIndex;
   3221             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3222                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3223             ) {
   3224                 --sourceIndex;
   3225             }
   3226         } else {
   3227             sourceIndex=-1;
   3228         }
   3229 
   3230         fromUWriteUInt8(
   3231             cnv,
   3232             SHIFT_IN_STR, 1,
   3233             &target, (const char *)targetLimit,
   3234             &offsets, sourceIndex,
   3235             err);
   3236     }
   3237 
   3238     /*save the state and return */
   3239     args->source = source;
   3240     args->target = (char*)target;
   3241 }
   3242 
   3243 
   3244 static void
   3245 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3246                                                UErrorCode* err){
   3247     char tempBuf[3];
   3248     const char *mySource = (char *) args->source;
   3249     UChar *myTarget = args->target;
   3250     const char *mySourceLimit = args->sourceLimit;
   3251     uint32_t targetUniChar = 0x0000;
   3252     uint32_t mySourceChar = 0x0000;
   3253     UConverterDataISO2022* myData;
   3254     ISO2022State *pToU2022State;
   3255 
   3256     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3257     pToU2022State = &myData->toU2022State;
   3258 
   3259     if(myData->key != 0) {
   3260         /* continue with a partial escape sequence */
   3261         goto escape;
   3262     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3263         /* continue with a partial double-byte character */
   3264         mySourceChar = args->converter->toUBytes[0];
   3265         args->converter->toULength = 0;
   3266         targetUniChar = missingCharMarker;
   3267         goto getTrailByte;
   3268     }
   3269 
   3270     while(mySource < mySourceLimit){
   3271 
   3272         targetUniChar =missingCharMarker;
   3273 
   3274         if(myTarget < args->targetLimit){
   3275 
   3276             mySourceChar= (unsigned char) *mySource++;
   3277 
   3278             switch(mySourceChar){
   3279             case UCNV_SI:
   3280                 pToU2022State->g=0;
   3281                 if (myData->isEmptySegment) {
   3282                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3283                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3284                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3285                     args->converter->toUBytes[0] = mySourceChar;
   3286                     args->converter->toULength = 1;
   3287                     args->target = myTarget;
   3288                     args->source = mySource;
   3289                     return;
   3290                 }
   3291                 continue;
   3292 
   3293             case UCNV_SO:
   3294                 if(pToU2022State->cs[1] != 0) {
   3295                     pToU2022State->g=1;
   3296                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3297                     continue;
   3298                 } else {
   3299                     /* illegal to have SO before a matching designator */
   3300                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3301                     break;
   3302                 }
   3303 
   3304             case ESC_2022:
   3305                 mySource--;
   3306 escape:
   3307                 {
   3308                     const char * mySourceBefore = mySource;
   3309                     int8_t toULengthBefore = args->converter->toULength;
   3310 
   3311                     changeState_2022(args->converter,&(mySource),
   3312                         mySourceLimit, ISO_2022_CN,err);
   3313 
   3314                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3315                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3316                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3317                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3318                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   3319                     }
   3320                 }
   3321 
   3322                 /* invalid or illegal escape sequence */
   3323                 if(U_FAILURE(*err)){
   3324                     args->target = myTarget;
   3325                     args->source = mySource;
   3326                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3327                     return;
   3328                 }
   3329                 continue;
   3330 
   3331             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3332 
   3333             case CR:
   3334                 /*falls through*/
   3335             case LF:
   3336                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3337                 /* falls through */
   3338             default:
   3339                 /* convert one or two bytes */
   3340                 myData->isEmptySegment = FALSE;
   3341                 if(pToU2022State->g != 0) {
   3342                     if(mySource < mySourceLimit) {
   3343                         UConverterSharedData *cnv;
   3344                         StateEnum tempState;
   3345                         int32_t tempBufLen;
   3346                         int leadIsOk, trailIsOk;
   3347                         uint8_t trailByte;
   3348 getTrailByte:
   3349                         trailByte = (uint8_t)*mySource;
   3350                         /*
   3351                          * Ticket 5691: consistent illegal sequences:
   3352                          * - We include at least the first byte in the illegal sequence.
   3353                          * - If any of the non-initial bytes could be the start of a character,
   3354                          *   we stop the illegal sequence before the first one of those.
   3355                          *
   3356                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3357                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3358                          * Otherwise we convert or report the pair of bytes.
   3359                          */
   3360                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3361                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3362                         if (leadIsOk && trailIsOk) {
   3363                             ++mySource;
   3364                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3365                             if(tempState >= CNS_11643_0) {
   3366                                 cnv = myData->myConverterArray[CNS_11643];
   3367                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3368                                 tempBuf[1] = (char) (mySourceChar);
   3369                                 tempBuf[2] = (char) trailByte;
   3370                                 tempBufLen = 3;
   3371 
   3372                             }else{
   3373                                 U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
   3374                                 cnv = myData->myConverterArray[tempState];
   3375                                 tempBuf[0] = (char) (mySourceChar);
   3376                                 tempBuf[1] = (char) trailByte;
   3377                                 tempBufLen = 2;
   3378                             }
   3379                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3380                             mySourceChar = (mySourceChar << 8) | trailByte;
   3381                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3382                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3383                             ++mySource;
   3384                             /* add another bit so that the code below writes 2 bytes in case of error */
   3385                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3386                         }
   3387                         if(pToU2022State->g>=2) {
   3388                             /* return from a single-shift state to the previous one */
   3389                             pToU2022State->g=pToU2022State->prevG;
   3390                         }
   3391                     } else {
   3392                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3393                         args->converter->toULength = 1;
   3394                         goto endloop;
   3395                     }
   3396                 }
   3397                 else{
   3398                     if(mySourceChar <= 0x7f) {
   3399                         targetUniChar = (UChar) mySourceChar;
   3400                     }
   3401                 }
   3402                 break;
   3403             }
   3404             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3405                 if(args->offsets){
   3406                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3407                 }
   3408                 *(myTarget++)=(UChar)targetUniChar;
   3409             }
   3410             else if(targetUniChar > missingCharMarker){
   3411                 /* disassemble the surrogate pair and write to output*/
   3412                 targetUniChar-=0x0010000;
   3413                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3414                 if(args->offsets){
   3415                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3416                 }
   3417                 ++myTarget;
   3418                 if(myTarget< args->targetLimit){
   3419                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3420                     if(args->offsets){
   3421                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3422                     }
   3423                     ++myTarget;
   3424                 }else{
   3425                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3426                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3427                 }
   3428 
   3429             }
   3430             else{
   3431                 /* Call the callback function*/
   3432                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3433                 break;
   3434             }
   3435         }
   3436         else{
   3437             *err =U_BUFFER_OVERFLOW_ERROR;
   3438             break;
   3439         }
   3440     }
   3441 endloop:
   3442     args->target = myTarget;
   3443     args->source = mySource;
   3444 }
   3445 
   3446 static void
   3447 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3448     UConverter *cnv = args->converter;
   3449     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3450     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3451     char *p, *subchar;
   3452     char buffer[8];
   3453     int32_t length;
   3454 
   3455     subchar=(char *)cnv->subChars;
   3456     length=cnv->subCharLen; /* assume length==1 for most variants */
   3457 
   3458     p = buffer;
   3459     switch(myConverterData->locale[0]){
   3460     case 'j':
   3461         {
   3462             int8_t cs;
   3463 
   3464             if(pFromU2022State->g == 1) {
   3465                 /* JIS7: switch from G1 to G0 */
   3466                 pFromU2022State->g = 0;
   3467                 *p++ = UCNV_SI;
   3468             }
   3469 
   3470             cs = pFromU2022State->cs[0];
   3471             if(cs != ASCII && cs != JISX201) {
   3472                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3473                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3474                 *p++ = '\x1b';
   3475                 *p++ = '\x28';
   3476                 *p++ = '\x42';
   3477             }
   3478 
   3479             *p++ = subchar[0];
   3480             break;
   3481         }
   3482     case 'c':
   3483         if(pFromU2022State->g != 0) {
   3484             /* not in ASCII mode: switch to ASCII */
   3485             pFromU2022State->g = 0;
   3486             *p++ = UCNV_SI;
   3487         }
   3488         *p++ = subchar[0];
   3489         break;
   3490     case 'k':
   3491         if(myConverterData->version == 0) {
   3492             if(length == 1) {
   3493                 if((UBool)args->converter->fromUnicodeStatus) {
   3494                     /* in DBCS mode: switch to SBCS */
   3495                     args->converter->fromUnicodeStatus = 0;
   3496                     *p++ = UCNV_SI;
   3497                 }
   3498                 *p++ = subchar[0];
   3499             } else /* length == 2*/ {
   3500                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3501                     /* in SBCS mode: switch to DBCS */
   3502                     args->converter->fromUnicodeStatus = 1;
   3503                     *p++ = UCNV_SO;
   3504                 }
   3505                 *p++ = subchar[0];
   3506                 *p++ = subchar[1];
   3507             }
   3508             break;
   3509         } else {
   3510             /* save the subconverter's substitution string */
   3511             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3512             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3513 
   3514             /* set our substitution string into the subconverter */
   3515             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3516             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3517 
   3518             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3519             args->converter = myConverterData->currentConverter;
   3520             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3521             ucnv_cbFromUWriteSub(args, 0, err);
   3522             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3523             args->converter = cnv;
   3524 
   3525             /* restore the subconverter's substitution string */
   3526             myConverterData->currentConverter->subChars = currentSubChars;
   3527             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3528 
   3529             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3530                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3531                     uprv_memcpy(
   3532                         cnv->charErrorBuffer,
   3533                         myConverterData->currentConverter->charErrorBuffer,
   3534                         myConverterData->currentConverter->charErrorBufferLength);
   3535                 }
   3536                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3537                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3538             }
   3539             return;
   3540         }
   3541     default:
   3542         /* not expected */
   3543         break;
   3544     }
   3545     ucnv_cbFromUWriteBytes(args,
   3546                            buffer, (int32_t)(p - buffer),
   3547                            offsetIndex, err);
   3548 }
   3549 
   3550 /*
   3551  * Structure for cloning an ISO 2022 converter into a single memory block.
   3552  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3553  * and then ucnv_safeClone() of the sub-converter may additionally align
   3554  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3555  * after currentConverter.
   3556  * This is because UAlignedMemory may be larger than the actually
   3557  * necessary alignment size for the platform.
   3558  * The other cloneStruct fields will not be moved around,
   3559  * and are aligned properly with cloneStruct's alignment.
   3560  */
   3561 struct cloneStruct
   3562 {
   3563     UConverter cnv;
   3564     UConverter currentConverter;
   3565     UAlignedMemory deadSpace;
   3566     UConverterDataISO2022 mydata;
   3567 };
   3568 
   3569 
   3570 static UConverter *
   3571 _ISO_2022_SafeClone(
   3572             const UConverter *cnv,
   3573             void *stackBuffer,
   3574             int32_t *pBufferSize,
   3575             UErrorCode *status)
   3576 {
   3577     struct cloneStruct * localClone;
   3578     UConverterDataISO2022 *cnvData;
   3579     int32_t i, size;
   3580 
   3581     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3582         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3583         return NULL;
   3584     }
   3585 
   3586     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3587     localClone = (struct cloneStruct *)stackBuffer;
   3588 
   3589     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3590 
   3591     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3592     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3593     localClone->cnv.isExtraLocal = TRUE;
   3594 
   3595     /* share the subconverters */
   3596 
   3597     if(cnvData->currentConverter != NULL) {
   3598         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3599         localClone->mydata.currentConverter =
   3600             ucnv_safeClone(cnvData->currentConverter,
   3601                             &localClone->currentConverter,
   3602                             &size, status);
   3603         if(U_FAILURE(*status)) {
   3604             return NULL;
   3605         }
   3606     }
   3607 
   3608     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3609         if(cnvData->myConverterArray[i] != NULL) {
   3610             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3611         }
   3612     }
   3613 
   3614     return &localClone->cnv;
   3615 }
   3616 
   3617 static void
   3618 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3619                     const USetAdder *sa,
   3620                     UConverterUnicodeSet which,
   3621                     UErrorCode *pErrorCode)
   3622 {
   3623     int32_t i;
   3624     UConverterDataISO2022* cnvData;
   3625 
   3626     if (U_FAILURE(*pErrorCode)) {
   3627         return;
   3628     }
   3629 #ifdef U_ENABLE_GENERIC_ISO_2022
   3630     if (cnv->sharedData == &_ISO2022Data) {
   3631         /* We use UTF-8 in this case */
   3632         sa->addRange(sa->set, 0, 0xd7FF);
   3633         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3634         return;
   3635     }
   3636 #endif
   3637 
   3638     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3639 
   3640     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3641     switch(cnvData->locale[0]){
   3642     case 'j':
   3643         /* include JIS X 0201 which is hardcoded */
   3644         sa->add(sa->set, 0xa5);
   3645         sa->add(sa->set, 0x203e);
   3646         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3647             /* include Latin-1 for some variants of JP */
   3648             sa->addRange(sa->set, 0, 0xff);
   3649         } else {
   3650             /* include ASCII for JP */
   3651             sa->addRange(sa->set, 0, 0x7f);
   3652         }
   3653         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3654             /*
   3655              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3656              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3657              * use half-width Katakana.
   3658              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3659              * half-width Katakana via the ESC ( I sequence.
   3660              * However, we only emit (fromUnicode) half-width Katakana according to the
   3661              * definition of each variant.
   3662              *
   3663              * When including fallbacks,
   3664              * we need to include half-width Katakana Unicode code points for all JP variants because
   3665              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3666              */
   3667             /* include half-width Katakana for JP */
   3668             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3669         }
   3670         break;
   3671     case 'c':
   3672     case 'z':
   3673         /* include ASCII for CN */
   3674         sa->addRange(sa->set, 0, 0x7f);
   3675         break;
   3676     case 'k':
   3677         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3678         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3679                 cnvData->currentConverter, sa, which, pErrorCode);
   3680         /* the loop over myConverterArray[] will simply not find another converter */
   3681         break;
   3682     default:
   3683         break;
   3684     }
   3685 
   3686 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3687             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3688                 cnvData->version==0 && i==CNS_11643
   3689             ) {
   3690                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3691                 ucnv_MBCSGetUnicodeSetForBytes(
   3692                         cnvData->myConverterArray[i],
   3693                         sa, UCNV_ROUNDTRIP_SET,
   3694                         0, 0x81, 0x82,
   3695                         pErrorCode);
   3696             }
   3697 #endif
   3698 
   3699     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3700         UConverterSetFilter filter;
   3701         if(cnvData->myConverterArray[i]!=NULL) {
   3702             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3703                 cnvData->version==0 && i==CNS_11643
   3704             ) {
   3705                 /*
   3706                  * Version-specific for CN:
   3707                  * CN version 0 does not map CNS planes 3..7 although
   3708                  * they are all available in the CNS conversion table;
   3709                  * CN version 1 (-EXT) does map them all.
   3710                  * The two versions create different Unicode sets.
   3711                  */
   3712                 filter=UCNV_SET_FILTER_2022_CN;
   3713             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3714                 /*
   3715                  * Only add code points that map to Shift-JIS codes
   3716                  * corresponding to JIS X 0208.
   3717                  */
   3718                 filter=UCNV_SET_FILTER_SJIS;
   3719             } else if(i==KSC5601) {
   3720                 /*
   3721                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3722                  * are broader than GR94.
   3723                  */
   3724                 filter=UCNV_SET_FILTER_GR94DBCS;
   3725             } else {
   3726                 filter=UCNV_SET_FILTER_NONE;
   3727             }
   3728             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3729         }
   3730     }
   3731 
   3732     /*
   3733      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3734      * sub-converters do by themselves.
   3735      * Remove these characters from the set.
   3736      */
   3737     sa->remove(sa->set, 0x0e);
   3738     sa->remove(sa->set, 0x0f);
   3739     sa->remove(sa->set, 0x1b);
   3740 
   3741     /* ISO 2022 converters do not convert C1 controls either */
   3742     sa->removeRange(sa->set, 0x80, 0x9f);
   3743 }
   3744 
   3745 static const UConverterImpl _ISO2022Impl={
   3746     UCNV_ISO_2022,
   3747 
   3748     NULL,
   3749     NULL,
   3750 
   3751     _ISO2022Open,
   3752     _ISO2022Close,
   3753     _ISO2022Reset,
   3754 
   3755 #ifdef U_ENABLE_GENERIC_ISO_2022
   3756     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3757     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3758     ucnv_fromUnicode_UTF8,
   3759     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3760 #else
   3761     NULL,
   3762     NULL,
   3763     NULL,
   3764     NULL,
   3765 #endif
   3766     NULL,
   3767 
   3768     NULL,
   3769     _ISO2022getName,
   3770     _ISO_2022_WriteSub,
   3771     _ISO_2022_SafeClone,
   3772     _ISO_2022_GetUnicodeSet,
   3773 
   3774     NULL,
   3775     NULL
   3776 };
   3777 static const UConverterStaticData _ISO2022StaticData={
   3778     sizeof(UConverterStaticData),
   3779     "ISO_2022",
   3780     2022,
   3781     UCNV_IBM,
   3782     UCNV_ISO_2022,
   3783     1,
   3784     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3785     { 0x1a, 0, 0, 0 },
   3786     1,
   3787     FALSE,
   3788     FALSE,
   3789     0,
   3790     0,
   3791     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3792 };
   3793 const UConverterSharedData _ISO2022Data={
   3794     sizeof(UConverterSharedData),
   3795     ~((uint32_t) 0),
   3796     NULL,
   3797     NULL,
   3798     &_ISO2022StaticData,
   3799     FALSE,
   3800     &_ISO2022Impl,
   3801     0, UCNV_MBCS_TABLE_INITIALIZER
   3802 };
   3803 
   3804 /*************JP****************/
   3805 static const UConverterImpl _ISO2022JPImpl={
   3806     UCNV_ISO_2022,
   3807 
   3808     NULL,
   3809     NULL,
   3810 
   3811     _ISO2022Open,
   3812     _ISO2022Close,
   3813     _ISO2022Reset,
   3814 
   3815     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3816     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3817     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3818     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3819     NULL,
   3820 
   3821     NULL,
   3822     _ISO2022getName,
   3823     _ISO_2022_WriteSub,
   3824     _ISO_2022_SafeClone,
   3825     _ISO_2022_GetUnicodeSet,
   3826 
   3827     NULL,
   3828     NULL
   3829 };
   3830 static const UConverterStaticData _ISO2022JPStaticData={
   3831     sizeof(UConverterStaticData),
   3832     "ISO_2022_JP",
   3833     0,
   3834     UCNV_IBM,
   3835     UCNV_ISO_2022,
   3836     1,
   3837     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3838     { 0x1a, 0, 0, 0 },
   3839     1,
   3840     FALSE,
   3841     FALSE,
   3842     0,
   3843     0,
   3844     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3845 };
   3846 
   3847 namespace {
   3848 
   3849 const UConverterSharedData _ISO2022JPData={
   3850     sizeof(UConverterSharedData),
   3851     ~((uint32_t) 0),
   3852     NULL,
   3853     NULL,
   3854     &_ISO2022JPStaticData,
   3855     FALSE,
   3856     &_ISO2022JPImpl,
   3857     0, UCNV_MBCS_TABLE_INITIALIZER
   3858 };
   3859 
   3860 }  // namespace
   3861 
   3862 /************* KR ***************/
   3863 static const UConverterImpl _ISO2022KRImpl={
   3864     UCNV_ISO_2022,
   3865 
   3866     NULL,
   3867     NULL,
   3868 
   3869     _ISO2022Open,
   3870     _ISO2022Close,
   3871     _ISO2022Reset,
   3872 
   3873     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3874     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3875     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3876     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3877     NULL,
   3878 
   3879     NULL,
   3880     _ISO2022getName,
   3881     _ISO_2022_WriteSub,
   3882     _ISO_2022_SafeClone,
   3883     _ISO_2022_GetUnicodeSet,
   3884 
   3885     NULL,
   3886     NULL
   3887 };
   3888 static const UConverterStaticData _ISO2022KRStaticData={
   3889     sizeof(UConverterStaticData),
   3890     "ISO_2022_KR",
   3891     0,
   3892     UCNV_IBM,
   3893     UCNV_ISO_2022,
   3894     1,
   3895     3, /* max 3 bytes per UChar: SO+DBCS */
   3896     { 0x1a, 0, 0, 0 },
   3897     1,
   3898     FALSE,
   3899     FALSE,
   3900     0,
   3901     0,
   3902     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3903 };
   3904 
   3905 namespace {
   3906 
   3907 const UConverterSharedData _ISO2022KRData={
   3908     sizeof(UConverterSharedData),
   3909     ~((uint32_t) 0),
   3910     NULL,
   3911     NULL,
   3912     &_ISO2022KRStaticData,
   3913     FALSE,
   3914     &_ISO2022KRImpl,
   3915     0, UCNV_MBCS_TABLE_INITIALIZER
   3916 };
   3917 
   3918 }  // namespace
   3919 
   3920 /*************** CN ***************/
   3921 static const UConverterImpl _ISO2022CNImpl={
   3922 
   3923     UCNV_ISO_2022,
   3924 
   3925     NULL,
   3926     NULL,
   3927 
   3928     _ISO2022Open,
   3929     _ISO2022Close,
   3930     _ISO2022Reset,
   3931 
   3932     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3933     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3934     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3935     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3936     NULL,
   3937 
   3938     NULL,
   3939     _ISO2022getName,
   3940     _ISO_2022_WriteSub,
   3941     _ISO_2022_SafeClone,
   3942     _ISO_2022_GetUnicodeSet,
   3943 
   3944     NULL,
   3945     NULL
   3946 };
   3947 static const UConverterStaticData _ISO2022CNStaticData={
   3948     sizeof(UConverterStaticData),
   3949     "ISO_2022_CN",
   3950     0,
   3951     UCNV_IBM,
   3952     UCNV_ISO_2022,
   3953     1,
   3954     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3955     { 0x1a, 0, 0, 0 },
   3956     1,
   3957     FALSE,
   3958     FALSE,
   3959     0,
   3960     0,
   3961     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3962 };
   3963 
   3964 namespace {
   3965 
   3966 const UConverterSharedData _ISO2022CNData={
   3967     sizeof(UConverterSharedData),
   3968     ~((uint32_t) 0),
   3969     NULL,
   3970     NULL,
   3971     &_ISO2022CNStaticData,
   3972     FALSE,
   3973     &_ISO2022CNImpl,
   3974     0, UCNV_MBCS_TABLE_INITIALIZER
   3975 };
   3976 
   3977 }  // namespace
   3978 
   3979 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3980