Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "unicode/utf16.h"
     38 #include "ucnv_imp.h"
     39 #include "ucnv_bld.h"
     40 #include "ucnv_cnv.h"
     41 #include "ucnvmbcs.h"
     42 #include "cstring.h"
     43 #include "cmemory.h"
     44 #include "uassert.h"
     45 
     46 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     47 
     48 #ifdef U_ENABLE_GENERIC_ISO_2022
     49 /*
     50  * I am disabling the generic ISO-2022 converter after proposing to do so on
     51  * the icu mailing list two days ago.
     52  *
     53  * Reasons:
     54  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     55  *    its designation sequences, single shifts with return to the previous state,
     56  *    switch-with-no-return to UTF-16BE or similar, etc.
     57  *    This is unlike the language-specific variants like ISO-2022-JP which
     58  *    require a much smaller repertoire of ISO-2022 features.
     59  *    These variants continue to be supported.
     60  * 2. I believe that no one is really using the generic ISO-2022 converter
     61  *    but rather always one of the language-specific variants.
     62  *    Note that ICU's generic ISO-2022 converter has always output one escape
     63  *    sequence followed by UTF-8 for the whole stream.
     64  * 3. Switching between subcharsets is extremely slow, because each time
     65  *    the previous converter is closed and a new one opened,
     66  *    without any kind of caching, least-recently-used list, etc.
     67  * 4. The code is currently buggy, and given the above it does not seem
     68  *    reasonable to spend the time on maintenance.
     69  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     70  *    This means, for example, that when ISO-8859-7 is designated, the following
     71  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     72  *    The ICU ISO-2022 converter does not handle this - and has no information
     73  *    about which subconverter would have to be shifted vs. which is designed
     74  *    for 7-bit ISO-2022.
     75  *
     76  * Markus Scherer 2003-dec-03
     77  */
     78 #endif
     79 
     80 static const char SHIFT_IN_STR[]  = "\x0F";
     81 // static const char SHIFT_OUT_STR[] = "\x0E";
     82 
     83 #define CR      0x0D
     84 #define LF      0x0A
     85 #define H_TAB   0x09
     86 #define V_TAB   0x0B
     87 #define SPACE   0x20
     88 
     89 enum {
     90     HWKANA_START=0xff61,
     91     HWKANA_END=0xff9f
     92 };
     93 
     94 /*
     95  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     96  * as bytes 21..7E. (Subtract 0x80.)
     97  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     98  * as bytes 20..7F. (Subtract 0x80.)
     99  * Do not encode C1 control codes with native bytes 80..9F
    100  * as bytes 00..1F (C0 control codes).
    101  */
    102 enum {
    103     GR94_START=0xa1,
    104     GR94_END=0xfe,
    105     GR96_START=0xa0,
    106     GR96_END=0xff
    107 };
    108 
    109 /*
    110  * ISO 2022 control codes must not be converted from Unicode
    111  * because they would mess up the byte stream.
    112  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    113  * corresponding to SO, SI, and ESC.
    114  */
    115 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    116 
    117 /* for ISO-2022-JP and -CN implementations */
    118 typedef enum  {
    119         /* shared values */
    120         INVALID_STATE=-1,
    121         ASCII = 0,
    122 
    123         SS2_STATE=0x10,
    124         SS3_STATE,
    125 
    126         /* JP */
    127         ISO8859_1 = 1 ,
    128         ISO8859_7 = 2 ,
    129         JISX201  = 3,
    130         JISX208 = 4,
    131         JISX212 = 5,
    132         GB2312  =6,
    133         KSC5601 =7,
    134         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    135 
    136         /* CN */
    137         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    138         GB2312_1=1,
    139         ISO_IR_165=2,
    140         CNS_11643=3,
    141 
    142         /*
    143          * these are used in StateEnum and ISO2022State variables,
    144          * but CNS_11643 must be used to index into myConverterArray[]
    145          */
    146         CNS_11643_0=0x20,
    147         CNS_11643_1,
    148         CNS_11643_2,
    149         CNS_11643_3,
    150         CNS_11643_4,
    151         CNS_11643_5,
    152         CNS_11643_6,
    153         CNS_11643_7
    154 } StateEnum;
    155 
    156 /* is the StateEnum charset value for a DBCS charset? */
    157 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    158 
    159 #define CSM(cs) ((uint16_t)1<<(cs))
    160 
    161 /*
    162  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    163  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    164  *
    165  * Note: The converter uses some leniency:
    166  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    167  *   all versions, not just JIS7 and JIS8.
    168  * - ICU does not distinguish between different versions of JIS X 0208.
    169  */
    170 enum { MAX_JA_VERSION=4 };
    171 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    174     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    175     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    176     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    177 };
    178 
    179 typedef enum {
    180         ASCII1=0,
    181         LATIN1,
    182         SBCS,
    183         DBCS,
    184         MBCS,
    185         HWKANA
    186 }Cnv2022Type;
    187 
    188 typedef struct ISO2022State {
    189     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    190     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    191     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    192 } ISO2022State;
    193 
    194 #define UCNV_OPTIONS_VERSION_MASK 0xf
    195 #define UCNV_2022_MAX_CONVERTERS 10
    196 
    197 typedef struct{
    198     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    199     UConverter *currentConverter;
    200     Cnv2022Type currentType;
    201     ISO2022State toU2022State, fromU2022State;
    202     uint32_t key;
    203     uint32_t version;
    204 #ifdef U_ENABLE_GENERIC_ISO_2022
    205     UBool isFirstBuffer;
    206 #endif
    207     UBool isEmptySegment;
    208     char name[30];
    209     char locale[3];
    210 }UConverterDataISO2022;
    211 
    212 /* Protos */
    213 /* ISO-2022 ----------------------------------------------------------------- */
    214 
    215 /*Forward declaration */
    216 U_CFUNC void
    217 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    218                       UErrorCode * err);
    219 U_CFUNC void
    220 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    221                                     UErrorCode * err);
    222 
    223 #define ESC_2022 0x1B /*ESC*/
    224 
    225 typedef enum
    226 {
    227         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    228         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    229         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    230         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    231 } UCNV_TableStates_2022;
    232 
    233 /*
    234 * The way these state transition arrays work is:
    235 * ex : ESC$B is the sequence for JISX208
    236 *      a) First Iteration: char is ESC
    237 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    238 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    239 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    240 *             value of x is stored at escSeqStateTable_Key_2022[0]
    241 *        iii) Save this index as offset
    242 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    243 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    244 *     b) Switch on this state and continue to next char
    245 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    246 *             which is normalize_esq_chars_2022[36] == 4
    247 *         ii) x is currently 1(from above)
    248 *               x<<=5 -- x is now 32
    249 *               x+=normalize_esq_chars_2022[36]
    250 *               now x is 36
    251 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    252 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    253 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    254 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    255 *     c) Switch on this state and continue to next char
    256 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    257 *        ii) x is currently 36 (from above)
    258 *            x<<=5 -- x is now 1152
    259 *            x+=normalize_esq_chars_2022[66]
    260 *            now x is 1161
    261 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    262 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    263 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    264 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    265 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    266 */
    267 
    268 
    269 /*Below are the 3 arrays depicting a state transition table*/
    270 static const int8_t normalize_esq_chars_2022[256] = {
    271 /*       0      1       2       3       4      5       6        7       8       9           */
    272 
    273          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    274         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    275         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    276         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    277         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    278         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    279         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    280         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    281         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    282         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    297         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    298         ,0     ,0      ,0      ,0      ,0      ,0
    299 };
    300 
    301 #ifdef U_ENABLE_GENERIC_ISO_2022
    302 /*
    303  * When the generic ISO-2022 converter is completely removed, not just disabled
    304  * per #ifdef, then the following state table and the associated tables that are
    305  * dimensioned with MAX_STATES_2022 should be trimmed.
    306  *
    307  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    308  * the associated escape sequences starting with ESC ( B should be removed.
    309  * This includes the ones with key values 1097 and all of the ones above 1000000.
    310  *
    311  * For the latter, the tables can simply be truncated.
    312  * For the former, since the tables must be kept parallel, it is probably best
    313  * to simply duplicate an adjacent table cell, parallel in all tables.
    314  *
    315  * It may make sense to restructure the tables, especially by using small search
    316  * tables for the variants instead of indexing them parallel to the table here.
    317  */
    318 #endif
    319 
    320 #define MAX_STATES_2022 74
    321 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    322 /*   0           1           2           3           4           5           6           7           8           9           */
    323 
    324      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    325     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    326     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    327     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    328     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    329     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    330     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    331     ,35947631   ,35947635   ,35947636   ,35947638
    332 };
    333 
    334 #ifdef U_ENABLE_GENERIC_ISO_2022
    335 
    336 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    337  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    338 
    339      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    340     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    341     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    342     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    343     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    344     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    345     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    346     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    347 };
    348 
    349 #endif
    350 
    351 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    352 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    353      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    354     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    355     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    357     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    358     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    359     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    360     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    361 };
    362 
    363 
    364 /* Type def for refactoring changeState_2022 code*/
    365 typedef enum{
    366 #ifdef U_ENABLE_GENERIC_ISO_2022
    367     ISO_2022=0,
    368 #endif
    369     ISO_2022_JP=1,
    370     ISO_2022_KR=2,
    371     ISO_2022_CN=3
    372 } Variant2022;
    373 
    374 /*********** ISO 2022 Converter Protos ***********/
    375 static void
    376 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    377 
    378 static void
    379  _ISO2022Close(UConverter *converter);
    380 
    381 static void
    382 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    383 
    384 static const char*
    385 _ISO2022getName(const UConverter* cnv);
    386 
    387 static void
    388 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    389 
    390 static UConverter *
    391 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    392 
    393 #ifdef U_ENABLE_GENERIC_ISO_2022
    394 static void
    395 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    396 #endif
    397 
    398 namespace {
    399 
    400 /*const UConverterSharedData _ISO2022Data;*/
    401 extern const UConverterSharedData _ISO2022JPData;
    402 extern const UConverterSharedData _ISO2022KRData;
    403 extern const UConverterSharedData _ISO2022CNData;
    404 
    405 }  // namespace
    406 
    407 /*************** Converter implementations ******************/
    408 
    409 /* The purpose of this function is to get around gcc compiler warnings. */
    410 static inline void
    411 fromUWriteUInt8(UConverter *cnv,
    412                  const char *bytes, int32_t length,
    413                  uint8_t **target, const char *targetLimit,
    414                  int32_t **offsets,
    415                  int32_t sourceIndex,
    416                  UErrorCode *pErrorCode)
    417 {
    418     char *targetChars = (char *)*target;
    419     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    420                          offsets, sourceIndex, pErrorCode);
    421     *target = (uint8_t*)targetChars;
    422 
    423 }
    424 
    425 static inline void
    426 setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){
    427     if(myConverterData->version == 1) {
    428         UConverter *cnv = myConverterData->currentConverter;
    429 
    430         cnv->toUnicodeStatus=0;     /* offset */
    431         cnv->mode=0;                /* state */
    432         cnv->toULength=0;           /* byteIndex */
    433     }
    434 }
    435 
    436 static inline void
    437 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    438    /* in ISO-2022-KR the designator sequence appears only once
    439     * in a file so we append it only once
    440     */
    441     if( converter->charErrorBufferLength==0){
    442 
    443         converter->charErrorBufferLength = 4;
    444         converter->charErrorBuffer[0] = 0x1b;
    445         converter->charErrorBuffer[1] = 0x24;
    446         converter->charErrorBuffer[2] = 0x29;
    447         converter->charErrorBuffer[3] = 0x43;
    448     }
    449     if(myConverterData->version == 1) {
    450         UConverter *cnv = myConverterData->currentConverter;
    451 
    452         cnv->fromUChar32=0;
    453         cnv->fromUnicodeStatus=1;   /* prevLength */
    454     }
    455 }
    456 
    457 static void
    458 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    459 
    460     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    461 
    462     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    463     if(cnv->extraInfo != NULL) {
    464         UConverterNamePieces stackPieces;
    465         UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
    466         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    467         uint32_t version;
    468 
    469         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    470 
    471         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    472         myConverterData->currentType = ASCII1;
    473         cnv->fromUnicodeStatus =FALSE;
    474         if(pArgs->locale){
    475             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    476         }
    477         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    478         myConverterData->version = version;
    479         /* Begin Google-specific change. */
    480         /* The "jk" locale ID was made up for KDDI ISO-2022-JP. */
    481         /* The "js" locale ID was made up for SoftBank ISO-2022-JP. */
    482         if((myLocale[0]=='j' &&
    483             (myLocale[1]=='a'|| myLocale[1]=='p' || myLocale[1]=='k' ||
    484              myLocale[1]=='s') &&
    485             (myLocale[2]=='_' || myLocale[2]=='\0')))
    486         {
    487             size_t len=0;
    488             /* open the required converters and cache them */
    489             if(version>MAX_JA_VERSION) {
    490                 /* prevent indexing beyond jpCharsetMasks[] */
    491                 myConverterData->version = version = 0;
    492             }
    493             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    494                 myConverterData->myConverterArray[ISO8859_7] =
    495                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    496             }
    497             if (myLocale[1]=='k') {  /* Use KDDI's version. */
    498                 myConverterData->myConverterArray[JISX208] =
    499                     ucnv_loadSharedData("kddi-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    500             } else if (myLocale[1]=='s') {  /* Use SoftBank's version. */
    501                 myConverterData->myConverterArray[JISX208] =
    502                     ucnv_loadSharedData("softbank-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    503             } else {
    504                 /*
    505                  * Change for http://b/issue?id=937017 :
    506                  * Restore JIS X 0208 ISO-2022-JP mappings from before
    507                  * sharing the table with the Shift-JIS converter
    508                  * (CL 5963009 and http://bugs.icu-project.org/trac/ticket/5797).
    509                  * TODO(mscherer): Create and use a new, unified Google Shift-JIS
    510                  * table for both Shift-JIS and ISO-2022-JP.
    511                  */
    512                 myConverterData->myConverterArray[JISX208]  =
    513                     ucnv_loadSharedData("jisx-208", &stackPieces, &stackArgs, errorCode);
    514             }
    515             /* End Google-specific change. */
    516             /* END android-changed */
    517 
    518             if(jpCharsetMasks[version]&CSM(JISX212)) {
    519                 myConverterData->myConverterArray[JISX212] =
    520                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    521             }
    522             if(jpCharsetMasks[version]&CSM(GB2312)) {
    523                 myConverterData->myConverterArray[GB2312] =
    524                     /* BEGIN android-changed */
    525                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
    526                     /* END android-changed */
    527             }
    528             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    529                 myConverterData->myConverterArray[KSC5601] =
    530                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    531             }
    532 
    533             /* set the function pointers to appropriate funtions */
    534             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    535             uprv_strcpy(myConverterData->locale,"ja");
    536 
    537             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    538             len = uprv_strlen(myConverterData->name);
    539             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    540             myConverterData->name[len+1]='\0';
    541         }
    542         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    543             (myLocale[2]=='_' || myLocale[2]=='\0'))
    544         {
    545             const char *cnvName;
    546             if(version==1) {
    547                 cnvName="icu-internal-25546";
    548             } else {
    549                 /* BEGIN android-changed */
    550                 cnvName="ksc_5601";
    551                 /* END android-changed */
    552                 myConverterData->version=version=0;
    553             }
    554             if(pArgs->onlyTestIsLoadable) {
    555                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    556                 uprv_free(cnv->extraInfo);
    557                 cnv->extraInfo=NULL;
    558                 return;
    559             } else {
    560                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    561                 if (U_FAILURE(*errorCode)) {
    562                     _ISO2022Close(cnv);
    563                     return;
    564                 }
    565 
    566                 if(version==1) {
    567                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    568                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    569                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    570                 }else{
    571                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    572                 }
    573 
    574                 /* initialize the state variables */
    575                 setInitialStateToUnicodeKR(cnv, myConverterData);
    576                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    577 
    578                 /* set the function pointers to appropriate funtions */
    579                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    580                 uprv_strcpy(myConverterData->locale,"ko");
    581             }
    582         }
    583         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    584             (myLocale[2]=='_' || myLocale[2]=='\0'))
    585         {
    586 
    587             /* open the required converters and cache them */
    588             /* BEGIN android-changed */
    589             myConverterData->myConverterArray[GB2312_1] =
    590                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    591             if(version==1) {
    592                 myConverterData->myConverterArray[ISO_IR_165] =
    593                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    594             }
    595             myConverterData->myConverterArray[CNS_11643] =
    596                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    597             /* END android-changed */
    598 
    599 
    600             /* set the function pointers to appropriate funtions */
    601             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    602             uprv_strcpy(myConverterData->locale,"cn");
    603 
    604             if (version==0){
    605                 myConverterData->version = 0;
    606                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    607             }else if (version==1){
    608                 myConverterData->version = 1;
    609                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    610             }else {
    611                 myConverterData->version = 2;
    612                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
    613             }
    614         }
    615         else{
    616 #ifdef U_ENABLE_GENERIC_ISO_2022
    617             myConverterData->isFirstBuffer = TRUE;
    618 
    619             /* append the UTF-8 escape sequence */
    620             cnv->charErrorBufferLength = 3;
    621             cnv->charErrorBuffer[0] = 0x1b;
    622             cnv->charErrorBuffer[1] = 0x25;
    623             cnv->charErrorBuffer[2] = 0x42;
    624 
    625             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    626             /* initialize the state variables */
    627             uprv_strcpy(myConverterData->name,"ISO_2022");
    628 #else
    629             *errorCode = U_UNSUPPORTED_ERROR;
    630             return;
    631 #endif
    632         }
    633 
    634         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    635 
    636         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    637             _ISO2022Close(cnv);
    638         }
    639     } else {
    640         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    641     }
    642 }
    643 
    644 
    645 static void
    646 _ISO2022Close(UConverter *converter) {
    647     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    648     UConverterSharedData **array = myData->myConverterArray;
    649     int32_t i;
    650 
    651     if (converter->extraInfo != NULL) {
    652         /*close the array of converter pointers and free the memory*/
    653         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    654             if(array[i]!=NULL) {
    655                 ucnv_unloadSharedDataIfReady(array[i]);
    656             }
    657         }
    658 
    659         ucnv_close(myData->currentConverter);
    660 
    661         if(!converter->isExtraLocal){
    662             uprv_free (converter->extraInfo);
    663             converter->extraInfo = NULL;
    664         }
    665     }
    666 }
    667 
    668 static void
    669 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    670     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    671     if(choice<=UCNV_RESET_TO_UNICODE) {
    672         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    673         myConverterData->key = 0;
    674         myConverterData->isEmptySegment = FALSE;
    675     }
    676     if(choice!=UCNV_RESET_TO_UNICODE) {
    677         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    678     }
    679 #ifdef U_ENABLE_GENERIC_ISO_2022
    680     if(myConverterData->locale[0] == 0){
    681         if(choice<=UCNV_RESET_TO_UNICODE) {
    682             myConverterData->isFirstBuffer = TRUE;
    683             myConverterData->key = 0;
    684             if (converter->mode == UCNV_SO){
    685                 ucnv_close (myConverterData->currentConverter);
    686                 myConverterData->currentConverter=NULL;
    687             }
    688             converter->mode = UCNV_SI;
    689         }
    690         if(choice!=UCNV_RESET_TO_UNICODE) {
    691             /* re-append UTF-8 escape sequence */
    692             converter->charErrorBufferLength = 3;
    693             converter->charErrorBuffer[0] = 0x1b;
    694             converter->charErrorBuffer[1] = 0x28;
    695             converter->charErrorBuffer[2] = 0x42;
    696         }
    697     }
    698     else
    699 #endif
    700     {
    701         /* reset the state variables */
    702         if(myConverterData->locale[0] == 'k'){
    703             if(choice<=UCNV_RESET_TO_UNICODE) {
    704                 setInitialStateToUnicodeKR(converter, myConverterData);
    705             }
    706             if(choice!=UCNV_RESET_TO_UNICODE) {
    707                 setInitialStateFromUnicodeKR(converter, myConverterData);
    708             }
    709         }
    710     }
    711 }
    712 
    713 static const char*
    714 _ISO2022getName(const UConverter* cnv){
    715     if(cnv->extraInfo){
    716         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    717         return myData->name;
    718     }
    719     return NULL;
    720 }
    721 
    722 
    723 /*************** to unicode *******************/
    724 /****************************************************************************
    725  * Recognized escape sequences are
    726  * <ESC>(B  ASCII
    727  * <ESC>.A  ISO-8859-1
    728  * <ESC>.F  ISO-8859-7
    729  * <ESC>(J  JISX-201
    730  * <ESC>(I  JISX-201
    731  * <ESC>$B  JISX-208
    732  * <ESC>$@  JISX-208
    733  * <ESC>$(D JISX-212
    734  * <ESC>$A  GB2312
    735  * <ESC>$(C KSC5601
    736  */
    737 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    738 /*      0                1               2               3               4               5               6               7               8               9    */
    739     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    740     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    741     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    742     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    743     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    744     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    745     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    746     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    747 };
    748 
    749 /*************** to unicode *******************/
    750 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    751 /*      0                1               2               3               4               5               6               7               8               9    */
    752      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    753     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    754     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    755     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    756     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    757     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    758     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    759     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    760 };
    761 
    762 
    763 static UCNV_TableStates_2022
    764 getKey_2022(char c,int32_t* key,int32_t* offset){
    765     int32_t togo;
    766     int32_t low = 0;
    767     int32_t hi = MAX_STATES_2022;
    768     int32_t oldmid=0;
    769 
    770     togo = normalize_esq_chars_2022[(uint8_t)c];
    771     if(togo == 0) {
    772         /* not a valid character anywhere in an escape sequence */
    773         *key = 0;
    774         *offset = 0;
    775         return INVALID_2022;
    776     }
    777     togo = (*key << 5) + togo;
    778 
    779     while (hi != low)  /*binary search*/{
    780 
    781         register int32_t mid = (hi+low) >> 1; /*Finds median*/
    782 
    783         if (mid == oldmid)
    784             break;
    785 
    786         if (escSeqStateTable_Key_2022[mid] > togo){
    787             hi = mid;
    788         }
    789         else if (escSeqStateTable_Key_2022[mid] < togo){
    790             low = mid;
    791         }
    792         else /*we found it*/{
    793             *key = togo;
    794             *offset = mid;
    795             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    796         }
    797         oldmid = mid;
    798 
    799     }
    800 
    801     *key = 0;
    802     *offset = 0;
    803     return INVALID_2022;
    804 }
    805 
    806 /*runs through a state machine to determine the escape sequence - codepage correspondance
    807  */
    808 static void
    809 changeState_2022(UConverter* _this,
    810                 const char** source,
    811                 const char* sourceLimit,
    812                 Variant2022 var,
    813                 UErrorCode* err){
    814     UCNV_TableStates_2022 value;
    815     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    816     uint32_t key = myData2022->key;
    817     int32_t offset = 0;
    818     int8_t initialToULength = _this->toULength;
    819     char c;
    820 
    821     value = VALID_NON_TERMINAL_2022;
    822     while (*source < sourceLimit) {
    823         c = *(*source)++;
    824         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    825         value = getKey_2022(c,(int32_t *) &key, &offset);
    826 
    827         switch (value){
    828 
    829         case VALID_NON_TERMINAL_2022 :
    830             /* continue with the loop */
    831             break;
    832 
    833         case VALID_TERMINAL_2022:
    834             key = 0;
    835             goto DONE;
    836 
    837         case INVALID_2022:
    838             goto DONE;
    839 
    840         case VALID_MAYBE_TERMINAL_2022:
    841 #ifdef U_ENABLE_GENERIC_ISO_2022
    842             /* ESC ( B is ambiguous only for ISO_2022 itself */
    843             if(var == ISO_2022) {
    844                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    845                 _this->toULength = 0;
    846 
    847                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    848 
    849                 /* continue with the loop */
    850                 value = VALID_NON_TERMINAL_2022;
    851                 break;
    852             } else
    853 #endif
    854             {
    855                 /* not ISO_2022 itself, finish here */
    856                 value = VALID_TERMINAL_2022;
    857                 key = 0;
    858                 goto DONE;
    859             }
    860         }
    861     }
    862 
    863 DONE:
    864     myData2022->key = key;
    865 
    866     if (value == VALID_NON_TERMINAL_2022) {
    867         /* indicate that the escape sequence is incomplete: key!=0 */
    868         return;
    869     } else if (value == INVALID_2022 ) {
    870         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    871     } else /* value == VALID_TERMINAL_2022 */ {
    872         switch(var){
    873 #ifdef U_ENABLE_GENERIC_ISO_2022
    874         case ISO_2022:
    875         {
    876             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    877             if(chosenConverterName == NULL) {
    878                 /* SS2 or SS3 */
    879                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    880                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    881                 return;
    882             }
    883 
    884             _this->mode = UCNV_SI;
    885             ucnv_close(myData2022->currentConverter);
    886             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    887             if(U_SUCCESS(*err)) {
    888                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    889                 _this->mode = UCNV_SO;
    890             }
    891             break;
    892         }
    893 #endif
    894         case ISO_2022_JP:
    895             {
    896                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    897                 switch(tempState) {
    898                 case INVALID_STATE:
    899                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    900                     break;
    901                 case SS2_STATE:
    902                     if(myData2022->toU2022State.cs[2]!=0) {
    903                         if(myData2022->toU2022State.g<2) {
    904                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    905                         }
    906                         myData2022->toU2022State.g=2;
    907                     } else {
    908                         /* illegal to have SS2 before a matching designator */
    909                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    910                     }
    911                     break;
    912                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    913                 case ISO8859_1:
    914                 case ISO8859_7:
    915                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    916                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    917                     } else {
    918                         /* G2 charset for SS2 */
    919                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    920                     }
    921                     break;
    922                 default:
    923                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    924                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    925                     } else {
    926                         /* G0 charset */
    927                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    928                     }
    929                     break;
    930                 }
    931             }
    932             break;
    933         case ISO_2022_CN:
    934             {
    935                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    936                 switch(tempState) {
    937                 case INVALID_STATE:
    938                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    939                     break;
    940                 case SS2_STATE:
    941                     if(myData2022->toU2022State.cs[2]!=0) {
    942                         if(myData2022->toU2022State.g<2) {
    943                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    944                         }
    945                         myData2022->toU2022State.g=2;
    946                     } else {
    947                         /* illegal to have SS2 before a matching designator */
    948                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    949                     }
    950                     break;
    951                 case SS3_STATE:
    952                     if(myData2022->toU2022State.cs[3]!=0) {
    953                         if(myData2022->toU2022State.g<2) {
    954                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    955                         }
    956                         myData2022->toU2022State.g=3;
    957                     } else {
    958                         /* illegal to have SS3 before a matching designator */
    959                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    960                     }
    961                     break;
    962                 case ISO_IR_165:
    963                     if(myData2022->version==0) {
    964                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    965                         break;
    966                     }
    967                     /*fall through*/
    968                 case GB2312_1:
    969                     /*fall through*/
    970                 case CNS_11643_1:
    971                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    972                     break;
    973                 case CNS_11643_2:
    974                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    975                     break;
    976                 default:
    977                     /* other CNS 11643 planes */
    978                     if(myData2022->version==0) {
    979                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    980                     } else {
    981                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    982                     }
    983                     break;
    984                 }
    985             }
    986             break;
    987         case ISO_2022_KR:
    988             if(offset==0x30){
    989                 /* nothing to be done, just accept this one escape sequence */
    990             } else {
    991                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    992             }
    993             break;
    994 
    995         default:
    996             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    997             break;
    998         }
    999     }
   1000     if(U_SUCCESS(*err)) {
   1001         _this->toULength = 0;
   1002     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
   1003         if(_this->toULength>1) {
   1004             /*
   1005              * Ticket 5691: consistent illegal sequences:
   1006              * - We include at least the first byte (ESC) in the illegal sequence.
   1007              * - If any of the non-initial bytes could be the start of a character,
   1008              *   we stop the illegal sequence before the first one of those.
   1009              *   In escape sequences, all following bytes are "printable", that is,
   1010              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
   1011              *   they are valid single/lead bytes.
   1012              *   For simplicity, we always only report the initial ESC byte as the
   1013              *   illegal sequence and back out all other bytes we looked at.
   1014              */
   1015             /* Back out some bytes. */
   1016             int8_t backOutDistance=_this->toULength-1;
   1017             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
   1018             if(backOutDistance<=bytesFromThisBuffer) {
   1019                 /* same as initialToULength<=1 */
   1020                 *source-=backOutDistance;
   1021             } else {
   1022                 /* Back out bytes from the previous buffer: Need to replay them. */
   1023                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
   1024                 /* same as -(initialToULength-1) */
   1025                 /* preToULength is negative! */
   1026                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
   1027                 *source-=bytesFromThisBuffer;
   1028             }
   1029             _this->toULength=1;
   1030         }
   1031     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
   1032         _this->toUCallbackReason = UCNV_UNASSIGNED;
   1033     }
   1034 }
   1035 
   1036 /*Checks the characters of the buffer against valid 2022 escape sequences
   1037 *if the match we return a pointer to the initial start of the sequence otherwise
   1038 *we return sourceLimit
   1039 */
   1040 /*for 2022 looks ahead in the stream
   1041  *to determine the longest possible convertible
   1042  *data stream
   1043  */
   1044 static inline const char*
   1045 getEndOfBuffer_2022(const char** source,
   1046                    const char* sourceLimit,
   1047                    UBool /*flush*/){
   1048 
   1049     const char* mySource = *source;
   1050 
   1051 #ifdef U_ENABLE_GENERIC_ISO_2022
   1052     if (*source >= sourceLimit)
   1053         return sourceLimit;
   1054 
   1055     do{
   1056 
   1057         if (*mySource == ESC_2022){
   1058             int8_t i;
   1059             int32_t key = 0;
   1060             int32_t offset;
   1061             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1062 
   1063             /* Kludge: I could not
   1064             * figure out the reason for validating an escape sequence
   1065             * twice - once here and once in changeState_2022().
   1066             * is it possible to have an ESC character in a ISO2022
   1067             * byte stream which is valid in a code page? Is it legal?
   1068             */
   1069             for (i=0;
   1070             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1071             i++) {
   1072                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1073             }
   1074             if (value > 0 || *mySource==ESC_2022)
   1075                 return mySource;
   1076 
   1077             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1078                 return sourceLimit;
   1079         }
   1080     }while (++mySource < sourceLimit);
   1081 
   1082     return sourceLimit;
   1083 #else
   1084     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1085         ++mySource;
   1086     }
   1087     return mySource;
   1088 #endif
   1089 }
   1090 
   1091 
   1092 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1093  * any future change in _MBCSFromUChar32() function should be reflected here.
   1094  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1095  */
   1096 static inline int32_t
   1097 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1098                                          UChar32 c,
   1099                                          uint32_t* value,
   1100                                          UBool useFallback,
   1101                                          int outputType)
   1102 {
   1103     const int32_t *cx;
   1104     const uint16_t *table;
   1105     uint32_t stage2Entry;
   1106     uint32_t myValue;
   1107     int32_t length;
   1108     const uint8_t *p;
   1109     /*
   1110      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1111      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1112      * else U_INTERNAL_PROGRAM_ERROR.
   1113      */
   1114     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1115     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1116         table=sharedData->mbcs.fromUnicodeTable;
   1117         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1118         /* get the bytes and the length for the output */
   1119         if(outputType==MBCS_OUTPUT_2){
   1120             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1121             if(myValue<=0xff) {
   1122                 length=1;
   1123             } else {
   1124                 length=2;
   1125             }
   1126         } else /* outputType==MBCS_OUTPUT_3 */ {
   1127             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1128             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1129             if(myValue<=0xff) {
   1130                 length=1;
   1131             } else if(myValue<=0xffff) {
   1132                 length=2;
   1133             } else {
   1134                 length=3;
   1135             }
   1136         }
   1137         /* is this code point assigned, or do we use fallbacks? */
   1138         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1139             /* assigned */
   1140             *value=myValue;
   1141             return length;
   1142         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1143             /*
   1144              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1145              * There is no way with this data structure for fallback output
   1146              * to be a zero byte.
   1147              */
   1148             *value=myValue;
   1149             return -length;
   1150         }
   1151     }
   1152 
   1153     cx=sharedData->mbcs.extIndexes;
   1154     if(cx!=NULL) {
   1155         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1156     }
   1157 
   1158     /* unassigned */
   1159     return 0;
   1160 }
   1161 
   1162 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1163  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1164  * @param retval pointer to output byte
   1165  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1166  */
   1167 static inline int32_t
   1168 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1169                                        UChar32 c,
   1170                                        uint32_t* retval,
   1171                                        UBool useFallback)
   1172 {
   1173     const uint16_t *table;
   1174     int32_t value;
   1175     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1176     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1177         return 0;
   1178     }
   1179     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1180     table=sharedData->mbcs.fromUnicodeTable;
   1181     /* get the byte for the output */
   1182     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1183     /* is this code point assigned, or do we use fallbacks? */
   1184     *retval=(uint32_t)(value&0xff);
   1185     if(value>=0xf00) {
   1186         return 1;  /* roundtrip */
   1187     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1188         return -1;  /* fallback taken */
   1189     } else {
   1190         return 0;  /* no mapping */
   1191     }
   1192 }
   1193 
   1194 /*
   1195  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1196  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1197  * to move it to the ISO 2022 range 21..7E.
   1198  * Return 0 if out of range.
   1199  */
   1200 static inline uint32_t
   1201 _2022FromGR94DBCS(uint32_t value) {
   1202     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1203         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1204     ) {
   1205         return value - 0x8080;  /* shift down to 21..7e byte range */
   1206     } else {
   1207         return 0;  /* not valid for ISO 2022 */
   1208     }
   1209 }
   1210 
   1211 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1212 /*
   1213  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1214  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1215  * unchanged.
   1216  */
   1217 static inline uint32_t
   1218 _2022ToGR94DBCS(uint32_t value) {
   1219     uint32_t returnValue = value + 0x8080;
   1220     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1221         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1222         return returnValue;
   1223     } else {
   1224         return value;
   1225     }
   1226 }
   1227 #endif
   1228 
   1229 #ifdef U_ENABLE_GENERIC_ISO_2022
   1230 
   1231 /**********************************************************************************
   1232 *  ISO-2022 Converter
   1233 *
   1234 *
   1235 */
   1236 
   1237 static void
   1238 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1239                                                            UErrorCode* err){
   1240     const char* mySourceLimit, *realSourceLimit;
   1241     const char* sourceStart;
   1242     const UChar* myTargetStart;
   1243     UConverter* saveThis;
   1244     UConverterDataISO2022* myData;
   1245     int8_t length;
   1246 
   1247     saveThis = args->converter;
   1248     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1249 
   1250     realSourceLimit = args->sourceLimit;
   1251     while (args->source < realSourceLimit) {
   1252         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1253             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1254             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1255 
   1256             if(args->source < mySourceLimit) {
   1257                 if(myData->currentConverter==NULL) {
   1258                     myData->currentConverter = ucnv_open("ASCII",err);
   1259                     if(U_FAILURE(*err)){
   1260                         return;
   1261                     }
   1262 
   1263                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1264                     saveThis->mode = UCNV_SO;
   1265                 }
   1266 
   1267                 /* convert to before the ESC or until the end of the buffer */
   1268                 myData->isFirstBuffer=FALSE;
   1269                 sourceStart = args->source;
   1270                 myTargetStart = args->target;
   1271                 args->converter = myData->currentConverter;
   1272                 ucnv_toUnicode(args->converter,
   1273                     &args->target,
   1274                     args->targetLimit,
   1275                     &args->source,
   1276                     mySourceLimit,
   1277                     args->offsets,
   1278                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1279                     err);
   1280                 args->converter = saveThis;
   1281 
   1282                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1283                     /* move the overflow buffer */
   1284                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1285                     myData->currentConverter->UCharErrorBufferLength = 0;
   1286                     if(length > 0) {
   1287                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1288                                     myData->currentConverter->UCharErrorBuffer,
   1289                                     length*U_SIZEOF_UCHAR);
   1290                     }
   1291                     return;
   1292                 }
   1293 
   1294                 /*
   1295                  * At least one of:
   1296                  * -Error while converting
   1297                  * -Done with entire buffer
   1298                  * -Need to write offsets or update the current offset
   1299                  *  (leave that up to the code in ucnv.c)
   1300                  *
   1301                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1302                  */
   1303                 if (U_FAILURE(*err) ||
   1304                     (args->source == realSourceLimit) ||
   1305                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1306                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1307                 ) {
   1308                     /* copy partial or error input for truncated detection and error handling */
   1309                     if(U_FAILURE(*err)) {
   1310                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1311                         if(length > 0) {
   1312                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1313                         }
   1314                     } else {
   1315                         length = saveThis->toULength = myData->currentConverter->toULength;
   1316                         if(length > 0) {
   1317                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1318                             if(args->source < mySourceLimit) {
   1319                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1320                             }
   1321                         }
   1322                     }
   1323                     return;
   1324                 }
   1325             }
   1326         }
   1327 
   1328         sourceStart = args->source;
   1329         changeState_2022(args->converter,
   1330                &(args->source),
   1331                realSourceLimit,
   1332                ISO_2022,
   1333                err);
   1334         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1335             /* let the ucnv.c code update its current offset */
   1336             return;
   1337         }
   1338     }
   1339 }
   1340 
   1341 #endif
   1342 
   1343 /*
   1344  * To Unicode Callback helper function
   1345  */
   1346 static void
   1347 toUnicodeCallback(UConverter *cnv,
   1348                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1349                   UErrorCode* err){
   1350     if(sourceChar>0xff){
   1351         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1352         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1353         cnv->toULength = 2;
   1354     }
   1355     else{
   1356         cnv->toUBytes[0] =(char) sourceChar;
   1357         cnv->toULength = 1;
   1358     }
   1359 
   1360     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1361         *err = U_INVALID_CHAR_FOUND;
   1362     }
   1363     else{
   1364         *err = U_ILLEGAL_CHAR_FOUND;
   1365     }
   1366 }
   1367 
   1368 /**************************************ISO-2022-JP*************************************************/
   1369 
   1370 /************************************** IMPORTANT **************************************************
   1371 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1372 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1373 * The converter iterates over each Unicode codepoint
   1374 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1375 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1376 * would do as far as possible.
   1377 *
   1378 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1379 * sure that ISO-2022 is also changed.
   1380 ***************************************************************************************************
   1381 */
   1382 
   1383 /***************************************************************************************************
   1384 * Rules for ISO-2022-jp encoding
   1385 * (i)   Escape sequences must be fully contained within a line they should not
   1386 *       span new lines or CRs
   1387 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1388 *       JIS-Roman character escape sequence should follow before the line terminates
   1389 * (iii) If the first character on the line is represented by two bytes then a two
   1390 *       byte character escape sequence should precede it
   1391 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1392 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1393 *       and invoked with SS2 (ESC N).
   1394 * (vi)  If there is any G0 designation in text, there must be a switch to
   1395 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1396 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1397 *       characters such as tab or CRLF.
   1398 * (vi)  Supported encodings:
   1399 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1400 *
   1401 *  source : RFC-1554
   1402 *
   1403 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1404 *          KSC5601 : alias to ibm-949 mapping table
   1405 *          GB2312 : alias to ibm-1386 mapping table
   1406 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1407 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1408 */
   1409 
   1410 /* preference order of JP charsets */
   1411 static const StateEnum jpCharsetPref[]={
   1412     ASCII,
   1413     JISX201,
   1414     ISO8859_1,
   1415     ISO8859_7,
   1416     JISX208,
   1417     JISX212,
   1418     GB2312,
   1419     KSC5601,
   1420     HWKANA_7BIT
   1421 };
   1422 
   1423 /*
   1424  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1425  * not in order of jpCharsetPref[]!
   1426  */
   1427 static const char escSeqChars[][6] ={
   1428     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1429     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1430     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1431     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1432     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1433     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1434     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1435     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1436     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1437 
   1438 };
   1439 static  const int8_t escSeqCharsLen[] ={
   1440     3, /* length of <ESC>(B  ASCII       */
   1441     3, /* length of <ESC>.A  ISO-8859-1  */
   1442     3, /* length of <ESC>.F  ISO-8859-7  */
   1443     3, /* length of <ESC>(J  JISX-201    */
   1444     3, /* length of <ESC>$B  JISX-208    */
   1445     4, /* length of <ESC>$(D JISX-212    */
   1446     3, /* length of <ESC>$A  GB2312      */
   1447     4, /* length of <ESC>$(C KSC5601     */
   1448     3  /* length of <ESC>(I  HWKANA_7BIT */
   1449 };
   1450 
   1451 /*
   1452 * The iteration over various code pages works this way:
   1453 * i)   Get the currentState from myConverterData->currentState
   1454 * ii)  Check if the character is mapped to a valid character in the currentState
   1455 *      Yes ->  a) set the initIterState to currentState
   1456 *       b) remain in this state until an invalid character is found
   1457 *      No  ->  a) go to the next code page and find the character
   1458 * iii) Before changing the state increment the current state check if the current state
   1459 *      is equal to the intitIteration state
   1460 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1461 *       break and return a U_INVALID_CHARACTER error
   1462 *      No  ->  Continue and find the character in next code page
   1463 *
   1464 *
   1465 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1466 */
   1467 
   1468 /* Map 00..7F to Unicode according to JIS X 0201. */
   1469 static inline uint32_t
   1470 jisx201ToU(uint32_t value) {
   1471     if(value < 0x5c) {
   1472         return value;
   1473     } else if(value == 0x5c) {
   1474         return 0xa5;
   1475     } else if(value == 0x7e) {
   1476         return 0x203e;
   1477     } else /* value <= 0x7f */ {
   1478         return value;
   1479     }
   1480 }
   1481 
   1482 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1483 static inline uint32_t
   1484 jisx201FromU(uint32_t value) {
   1485     if(value<=0x7f) {
   1486         if(value!=0x5c && value!=0x7e) {
   1487             return value;
   1488         }
   1489     } else if(value==0xa5) {
   1490         return 0x5c;
   1491     } else if(value==0x203e) {
   1492         return 0x7e;
   1493     }
   1494     return 0xfffe;
   1495 }
   1496 
   1497 /*
   1498  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1499  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1500  * Return 0 if the byte pair is out of range.
   1501  */
   1502 static inline uint32_t
   1503 _2022FromSJIS(uint32_t value) {
   1504     uint8_t trail;
   1505 
   1506     if(value > 0xEFFC) {
   1507         return 0;  /* beyond JIS X 0208 */
   1508     }
   1509 
   1510     trail = (uint8_t)value;
   1511 
   1512     value &= 0xff00;  /* lead byte */
   1513     if(value <= 0x9f00) {
   1514         value -= 0x7000;
   1515     } else /* 0xe000 <= value <= 0xef00 */ {
   1516         value -= 0xb000;
   1517     }
   1518     value <<= 1;
   1519 
   1520     if(trail <= 0x9e) {
   1521         value -= 0x100;
   1522         if(trail <= 0x7e) {
   1523             value |= trail - 0x1f;
   1524         } else {
   1525             value |= trail - 0x20;
   1526         }
   1527     } else /* trail <= 0xfc */ {
   1528         value |= trail - 0x7e;
   1529     }
   1530     return value;
   1531 }
   1532 
   1533 /*
   1534  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1535  * If either byte is outside 21..7E make sure that the result is not valid
   1536  * for Shift-JIS so that the converter catches it.
   1537  * Some invalid byte values already turn into equally invalid Shift-JIS
   1538  * byte values and need not be tested explicitly.
   1539  */
   1540 static inline void
   1541 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1542     if(c1&1) {
   1543         ++c1;
   1544         if(c2 <= 0x5f) {
   1545             c2 += 0x1f;
   1546         } else if(c2 <= 0x7e) {
   1547             c2 += 0x20;
   1548         } else {
   1549             c2 = 0;  /* invalid */
   1550         }
   1551     } else {
   1552         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1553             c2 += 0x7e;
   1554         } else {
   1555             c2 = 0;  /* invalid */
   1556         }
   1557     }
   1558     c1 >>= 1;
   1559     if(c1 <= 0x2f) {
   1560         c1 += 0x70;
   1561     } else if(c1 <= 0x3f) {
   1562         c1 += 0xb0;
   1563     } else {
   1564         c1 = 0;  /* invalid */
   1565     }
   1566     bytes[0] = (char)c1;
   1567     bytes[1] = (char)c2;
   1568 }
   1569 
   1570 /*
   1571  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1572  * Katakana.
   1573  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1574  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1575  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1576  */
   1577 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1578     0x2123,  /* U+FF61 */
   1579     0x2156,
   1580     0x2157,
   1581     0x2122,
   1582     0x2126,
   1583     0x2572,
   1584     0x2521,
   1585     0x2523,
   1586     0x2525,
   1587     0x2527,
   1588     0x2529,
   1589     0x2563,
   1590     0x2565,
   1591     0x2567,
   1592     0x2543,
   1593     0x213C,  /* U+FF70 */
   1594     0x2522,
   1595     0x2524,
   1596     0x2526,
   1597     0x2528,
   1598     0x252A,
   1599     0x252B,
   1600     0x252D,
   1601     0x252F,
   1602     0x2531,
   1603     0x2533,
   1604     0x2535,
   1605     0x2537,
   1606     0x2539,
   1607     0x253B,
   1608     0x253D,
   1609     0x253F,  /* U+FF80 */
   1610     0x2541,
   1611     0x2544,
   1612     0x2546,
   1613     0x2548,
   1614     0x254A,
   1615     0x254B,
   1616     0x254C,
   1617     0x254D,
   1618     0x254E,
   1619     0x254F,
   1620     0x2552,
   1621     0x2555,
   1622     0x2558,
   1623     0x255B,
   1624     0x255E,
   1625     0x255F,  /* U+FF90 */
   1626     0x2560,
   1627     0x2561,
   1628     0x2562,
   1629     0x2564,
   1630     0x2566,
   1631     0x2568,
   1632     0x2569,
   1633     0x256A,
   1634     0x256B,
   1635     0x256C,
   1636     0x256D,
   1637     0x256F,
   1638     0x2573,
   1639     0x212B,
   1640     0x212C   /* U+FF9F */
   1641 };
   1642 
   1643 static void
   1644 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1645     UConverter *cnv = args->converter;
   1646     UConverterDataISO2022 *converterData;
   1647     ISO2022State *pFromU2022State;
   1648     uint8_t *target = (uint8_t *) args->target;
   1649     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1650     const UChar* source = args->source;
   1651     const UChar* sourceLimit = args->sourceLimit;
   1652     int32_t* offsets = args->offsets;
   1653     UChar32 sourceChar;
   1654     char buffer[8];
   1655     int32_t len, outLen;
   1656     int8_t choices[10];
   1657     int32_t choiceCount;
   1658     uint32_t targetValue = 0;
   1659     UBool useFallback;
   1660 
   1661     int32_t i;
   1662     int8_t cs, g;
   1663 
   1664     /* set up the state */
   1665     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1666     pFromU2022State   = &converterData->fromU2022State;
   1667 
   1668     choiceCount = 0;
   1669 
   1670     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1671     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1672         goto getTrail;
   1673     }
   1674 
   1675     while(source < sourceLimit) {
   1676         if(target < targetLimit) {
   1677 
   1678             sourceChar  = *(source++);
   1679             /*check if the char is a First surrogate*/
   1680             if(U16_IS_SURROGATE(sourceChar)) {
   1681                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   1682 getTrail:
   1683                     /*look ahead to find the trail surrogate*/
   1684                     if(source < sourceLimit) {
   1685                         /* test the following code unit */
   1686                         UChar trail=(UChar) *source;
   1687                         if(U16_IS_TRAIL(trail)) {
   1688                             source++;
   1689                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   1690                             cnv->fromUChar32=0x00;
   1691                             /* convert this supplementary code point */
   1692                             /* exit this condition tree */
   1693                         } else {
   1694                             /* this is an unmatched lead code unit (1st surrogate) */
   1695                             /* callback(illegal) */
   1696                             *err=U_ILLEGAL_CHAR_FOUND;
   1697                             cnv->fromUChar32=sourceChar;
   1698                             break;
   1699                         }
   1700                     } else {
   1701                         /* no more input */
   1702                         cnv->fromUChar32=sourceChar;
   1703                         break;
   1704                     }
   1705                 } else {
   1706                     /* this is an unmatched trail code unit (2nd surrogate) */
   1707                     /* callback(illegal) */
   1708                     *err=U_ILLEGAL_CHAR_FOUND;
   1709                     cnv->fromUChar32=sourceChar;
   1710                     break;
   1711                 }
   1712             }
   1713 
   1714             /* do not convert SO/SI/ESC */
   1715             if(IS_2022_CONTROL(sourceChar)) {
   1716                 /* callback(illegal) */
   1717                 *err=U_ILLEGAL_CHAR_FOUND;
   1718                 cnv->fromUChar32=sourceChar;
   1719                 break;
   1720             }
   1721 
   1722             /* do the conversion */
   1723 
   1724             if(choiceCount == 0) {
   1725                 uint16_t csm;
   1726 
   1727                 /*
   1728                  * The csm variable keeps track of which charsets are allowed
   1729                  * and not used yet while building the choices[].
   1730                  */
   1731                 csm = jpCharsetMasks[converterData->version];
   1732                 choiceCount = 0;
   1733 
   1734                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1735                 if(converterData->version == 3 || converterData->version == 4) {
   1736                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1737                 }
   1738                 /* Do not try single-byte half-width Katakana for other versions. */
   1739                 csm &= ~CSM(HWKANA_7BIT);
   1740 
   1741                 /* try the current G0 charset */
   1742                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1743                 csm &= ~CSM(cs);
   1744 
   1745                 /* try the current G2 charset */
   1746                 if((cs = pFromU2022State->cs[2]) != 0) {
   1747                     choices[choiceCount++] = cs;
   1748                     csm &= ~CSM(cs);
   1749                 }
   1750 
   1751                 /* try all the other possible charsets */
   1752                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1753                     cs = (int8_t)jpCharsetPref[i];
   1754                     if(CSM(cs) & csm) {
   1755                         choices[choiceCount++] = cs;
   1756                         csm &= ~CSM(cs);
   1757                     }
   1758                 }
   1759             }
   1760 
   1761             cs = g = 0;
   1762             /*
   1763              * len==0: no mapping found yet
   1764              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1765              * len>0: found a roundtrip result, done
   1766              */
   1767             len = 0;
   1768             /*
   1769              * We will turn off useFallback after finding a fallback,
   1770              * but we still get fallbacks from PUA code points as usual.
   1771              * Therefore, we will also need to check that we don't overwrite
   1772              * an early fallback with a later one.
   1773              */
   1774             useFallback = cnv->useFallback;
   1775 
   1776             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1777                 uint32_t value;
   1778                 int32_t len2;
   1779                 int8_t cs0 = choices[i];
   1780                 switch(cs0) {
   1781                 case ASCII:
   1782                     if(sourceChar <= 0x7f) {
   1783                         targetValue = (uint32_t)sourceChar;
   1784                         len = 1;
   1785                         cs = cs0;
   1786                         g = 0;
   1787                     }
   1788                     break;
   1789                 case ISO8859_1:
   1790                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1791                         targetValue = (uint32_t)sourceChar - 0x80;
   1792                         len = 1;
   1793                         cs = cs0;
   1794                         g = 2;
   1795                     }
   1796                     break;
   1797                 case HWKANA_7BIT:
   1798                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1799                         if(converterData->version==3) {
   1800                             /* JIS7: use G1 (SO) */
   1801                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1802                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1803                             len = 1;
   1804                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1805                             g = 1;
   1806                         } else if(converterData->version==4) {
   1807                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1808                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1809                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1810                             len = 1;
   1811 
   1812                             cs = pFromU2022State->cs[0];
   1813                             if(IS_JP_DBCS(cs)) {
   1814                                 /* switch from a DBCS charset to JISX201 */
   1815                                 cs = (int8_t)JISX201;
   1816                             }
   1817                             /* else stay in the current G0 charset */
   1818                             g = 0;
   1819                         }
   1820                         /* else do not use HWKANA_7BIT with other versions */
   1821                     }
   1822                     break;
   1823                 case JISX201:
   1824                     /* G0 SBCS */
   1825                     value = jisx201FromU(sourceChar);
   1826                     if(value <= 0x7f) {
   1827                         targetValue = value;
   1828                         len = 1;
   1829                         cs = cs0;
   1830                         g = 0;
   1831                         useFallback = FALSE;
   1832                     }
   1833                     break;
   1834                 case JISX208:
   1835                     /* G0 DBCS from Shift-JIS table */
   1836                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1837                                 converterData->myConverterArray[cs0],
   1838                                 sourceChar, &value,
   1839                                 useFallback, MBCS_OUTPUT_2);
   1840                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1841                         value = _2022FromSJIS(value);
   1842                         if(value != 0) {
   1843                             targetValue = value;
   1844                             len = len2;
   1845                             cs = cs0;
   1846                             g = 0;
   1847                             useFallback = FALSE;
   1848                         }
   1849                     } else if(len == 0 && useFallback &&
   1850                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1851                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1852                         len = -2;
   1853                         cs = cs0;
   1854                         g = 0;
   1855                         useFallback = FALSE;
   1856                     }
   1857                     break;
   1858                 case ISO8859_7:
   1859                     /* G0 SBCS forced to 7-bit output */
   1860                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1861                                 converterData->myConverterArray[cs0],
   1862                                 sourceChar, &value,
   1863                                 useFallback);
   1864                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1865                         targetValue = value - 0x80;
   1866                         len = len2;
   1867                         cs = cs0;
   1868                         g = 2;
   1869                         useFallback = FALSE;
   1870                     }
   1871                     break;
   1872                 default:
   1873                     /* G0 DBCS */
   1874                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1875                                 converterData->myConverterArray[cs0],
   1876                                 sourceChar, &value,
   1877                                 useFallback, MBCS_OUTPUT_2);
   1878                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1879                         if(cs0 == KSC5601) {
   1880                             /*
   1881                              * Check for valid bytes for the encoding scheme.
   1882                              * This is necessary because the sub-converter (windows-949)
   1883                              * has a broader encoding scheme than is valid for 2022.
   1884                              */
   1885                             value = _2022FromGR94DBCS(value);
   1886                             if(value == 0) {
   1887                                 break;
   1888                             }
   1889                         }
   1890                         targetValue = value;
   1891                         len = len2;
   1892                         cs = cs0;
   1893                         g = 0;
   1894                         useFallback = FALSE;
   1895                     }
   1896                     break;
   1897                 }
   1898             }
   1899 
   1900             if(len != 0) {
   1901                 if(len < 0) {
   1902                     len = -len;  /* fallback */
   1903                 }
   1904                 outLen = 0; /* count output bytes */
   1905 
   1906                 /* write SI if necessary (only for JIS7) */
   1907                 if(pFromU2022State->g == 1 && g == 0) {
   1908                     buffer[outLen++] = UCNV_SI;
   1909                     pFromU2022State->g = 0;
   1910                 }
   1911 
   1912                 /* write the designation sequence if necessary */
   1913                 if(cs != pFromU2022State->cs[g]) {
   1914                     int32_t escLen = escSeqCharsLen[cs];
   1915                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1916                     outLen += escLen;
   1917                     pFromU2022State->cs[g] = cs;
   1918 
   1919                     /* invalidate the choices[] */
   1920                     choiceCount = 0;
   1921                 }
   1922 
   1923                 /* write the shift sequence if necessary */
   1924                 if(g != pFromU2022State->g) {
   1925                     switch(g) {
   1926                     /* case 0 handled before writing escapes */
   1927                     case 1:
   1928                         buffer[outLen++] = UCNV_SO;
   1929                         pFromU2022State->g = 1;
   1930                         break;
   1931                     default: /* case 2 */
   1932                         buffer[outLen++] = 0x1b;
   1933                         buffer[outLen++] = 0x4e;
   1934                         break;
   1935                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1936                     }
   1937                 }
   1938 
   1939                 /* write the output bytes */
   1940                 if(len == 1) {
   1941                     buffer[outLen++] = (char)targetValue;
   1942                 } else /* len == 2 */ {
   1943                     buffer[outLen++] = (char)(targetValue >> 8);
   1944                     buffer[outLen++] = (char)targetValue;
   1945                 }
   1946             } else {
   1947                 /*
   1948                  * if we cannot find the character after checking all codepages
   1949                  * then this is an error
   1950                  */
   1951                 *err = U_INVALID_CHAR_FOUND;
   1952                 cnv->fromUChar32=sourceChar;
   1953                 break;
   1954             }
   1955 
   1956             if(sourceChar == CR || sourceChar == LF) {
   1957                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1958                 pFromU2022State->cs[2] = 0;
   1959                 choiceCount = 0;
   1960             }
   1961 
   1962             /* output outLen>0 bytes in buffer[] */
   1963             if(outLen == 1) {
   1964                 *target++ = buffer[0];
   1965                 if(offsets) {
   1966                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1967                 }
   1968             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1969                 *target++ = buffer[0];
   1970                 *target++ = buffer[1];
   1971                 if(offsets) {
   1972                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1973                     *offsets++ = sourceIndex;
   1974                     *offsets++ = sourceIndex;
   1975                 }
   1976             } else {
   1977                 fromUWriteUInt8(
   1978                     cnv,
   1979                     buffer, outLen,
   1980                     &target, (const char *)targetLimit,
   1981                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1982                     err);
   1983                 if(U_FAILURE(*err)) {
   1984                     break;
   1985                 }
   1986             }
   1987         } /* end if(myTargetIndex<myTargetLength) */
   1988         else{
   1989             *err =U_BUFFER_OVERFLOW_ERROR;
   1990             break;
   1991         }
   1992 
   1993     }/* end while(mySourceIndex<mySourceLength) */
   1994 
   1995     /*
   1996      * the end of the input stream and detection of truncated input
   1997      * are handled by the framework, but for ISO-2022-JP conversion
   1998      * we need to be in ASCII mode at the very end
   1999      *
   2000      * conditions:
   2001      *   successful
   2002      *   in SO mode or not in ASCII mode
   2003      *   end of input and no truncated input
   2004      */
   2005     if( U_SUCCESS(*err) &&
   2006         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   2007         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   2008     ) {
   2009         int32_t sourceIndex;
   2010 
   2011         outLen = 0;
   2012 
   2013         if(pFromU2022State->g != 0) {
   2014             buffer[outLen++] = UCNV_SI;
   2015             pFromU2022State->g = 0;
   2016         }
   2017 
   2018         if(pFromU2022State->cs[0] != ASCII) {
   2019             int32_t escLen = escSeqCharsLen[ASCII];
   2020             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   2021             outLen += escLen;
   2022             pFromU2022State->cs[0] = (int8_t)ASCII;
   2023         }
   2024 
   2025         /* get the source index of the last input character */
   2026         /*
   2027          * TODO this would be simpler and more reliable if we used a pair
   2028          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2029          * so that we could simply use the prevSourceIndex here;
   2030          * this code gives an incorrect result for the rare case of an unmatched
   2031          * trail surrogate that is alone in the last buffer of the text stream
   2032          */
   2033         sourceIndex=(int32_t)(source-args->source);
   2034         if(sourceIndex>0) {
   2035             --sourceIndex;
   2036             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2037                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2038             ) {
   2039                 --sourceIndex;
   2040             }
   2041         } else {
   2042             sourceIndex=-1;
   2043         }
   2044 
   2045         fromUWriteUInt8(
   2046             cnv,
   2047             buffer, outLen,
   2048             &target, (const char *)targetLimit,
   2049             &offsets, sourceIndex,
   2050             err);
   2051     }
   2052 
   2053     /*save the state and return */
   2054     args->source = source;
   2055     args->target = (char*)target;
   2056 }
   2057 
   2058 /*************** to unicode *******************/
   2059 
   2060 static void
   2061 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2062                                                UErrorCode* err){
   2063     char tempBuf[2];
   2064     const char *mySource = (char *) args->source;
   2065     UChar *myTarget = args->target;
   2066     const char *mySourceLimit = args->sourceLimit;
   2067     uint32_t targetUniChar = 0x0000;
   2068     uint32_t mySourceChar = 0x0000;
   2069     uint32_t tmpSourceChar = 0x0000;
   2070     UConverterDataISO2022* myData;
   2071     ISO2022State *pToU2022State;
   2072     StateEnum cs;
   2073 
   2074     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2075     pToU2022State = &myData->toU2022State;
   2076 
   2077     if(myData->key != 0) {
   2078         /* continue with a partial escape sequence */
   2079         goto escape;
   2080     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2081         /* continue with a partial double-byte character */
   2082         mySourceChar = args->converter->toUBytes[0];
   2083         args->converter->toULength = 0;
   2084         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2085         targetUniChar = missingCharMarker;
   2086         goto getTrailByte;
   2087     }
   2088 
   2089     while(mySource < mySourceLimit){
   2090 
   2091         targetUniChar =missingCharMarker;
   2092 
   2093         if(myTarget < args->targetLimit){
   2094 
   2095             mySourceChar= (unsigned char) *mySource++;
   2096 
   2097             switch(mySourceChar) {
   2098             case UCNV_SI:
   2099                 if(myData->version==3) {
   2100                     pToU2022State->g=0;
   2101                     continue;
   2102                 } else {
   2103                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2104                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2105                     break;
   2106                 }
   2107 
   2108             case UCNV_SO:
   2109                 if(myData->version==3) {
   2110                     /* JIS7: switch to G1 half-width Katakana */
   2111                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2112                     pToU2022State->g=1;
   2113                     continue;
   2114                 } else {
   2115                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2116                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2117                     break;
   2118                 }
   2119 
   2120             case ESC_2022:
   2121                 mySource--;
   2122 escape:
   2123                 {
   2124                     const char * mySourceBefore = mySource;
   2125                     int8_t toULengthBefore = args->converter->toULength;
   2126 
   2127                     changeState_2022(args->converter,&(mySource),
   2128                         mySourceLimit, ISO_2022_JP,err);
   2129 
   2130                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2131                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2132                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2133                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2134                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   2135                     }
   2136                 }
   2137 
   2138                 /* invalid or illegal escape sequence */
   2139                 if(U_FAILURE(*err)){
   2140                     args->target = myTarget;
   2141                     args->source = mySource;
   2142                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2143                     return;
   2144                 }
   2145                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2146                 if(myData->key==0) {
   2147                     myData->isEmptySegment = TRUE;
   2148                 }
   2149                 continue;
   2150 
   2151             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2152 
   2153             case CR:
   2154                 /*falls through*/
   2155             case LF:
   2156                 /* automatically reset to single-byte mode */
   2157                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2158                     pToU2022State->cs[0] = (int8_t)ASCII;
   2159                 }
   2160                 pToU2022State->cs[2] = 0;
   2161                 pToU2022State->g = 0;
   2162                 /* falls through */
   2163             default:
   2164                 /* convert one or two bytes */
   2165                 myData->isEmptySegment = FALSE;
   2166                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2167                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2168                     !IS_JP_DBCS(cs)
   2169                 ) {
   2170                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2171                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2172 
   2173                     /* return from a single-shift state to the previous one */
   2174                     if(pToU2022State->g >= 2) {
   2175                         pToU2022State->g=pToU2022State->prevG;
   2176                     }
   2177                 } else switch(cs) {
   2178                 case ASCII:
   2179                     if(mySourceChar <= 0x7f) {
   2180                         targetUniChar = mySourceChar;
   2181                     }
   2182                     break;
   2183                 case ISO8859_1:
   2184                     if(mySourceChar <= 0x7f) {
   2185                         targetUniChar = mySourceChar + 0x80;
   2186                     }
   2187                     /* return from a single-shift state to the previous one */
   2188                     pToU2022State->g=pToU2022State->prevG;
   2189                     break;
   2190                 case ISO8859_7:
   2191                     if(mySourceChar <= 0x7f) {
   2192                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2193                         targetUniChar =
   2194                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2195                                 myData->myConverterArray[cs],
   2196                                 mySourceChar + 0x80);
   2197                     }
   2198                     /* return from a single-shift state to the previous one */
   2199                     pToU2022State->g=pToU2022State->prevG;
   2200                     break;
   2201                 case JISX201:
   2202                     if(mySourceChar <= 0x7f) {
   2203                         targetUniChar = jisx201ToU(mySourceChar);
   2204                     }
   2205                     break;
   2206                 case HWKANA_7BIT:
   2207                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2208                         /* 7-bit halfwidth Katakana */
   2209                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2210                     }
   2211                     break;
   2212                 default:
   2213                     /* G0 DBCS */
   2214                     if(mySource < mySourceLimit) {
   2215                         int leadIsOk, trailIsOk;
   2216                         uint8_t trailByte;
   2217 getTrailByte:
   2218                         trailByte = (uint8_t)*mySource;
   2219                         /*
   2220                          * Ticket 5691: consistent illegal sequences:
   2221                          * - We include at least the first byte in the illegal sequence.
   2222                          * - If any of the non-initial bytes could be the start of a character,
   2223                          *   we stop the illegal sequence before the first one of those.
   2224                          *
   2225                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2226                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2227                          * Otherwise we convert or report the pair of bytes.
   2228                          */
   2229                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2230                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2231                         if (leadIsOk && trailIsOk) {
   2232                             ++mySource;
   2233                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2234                             if(cs == JISX208) {
   2235                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2236                                 mySourceChar = tmpSourceChar;
   2237                             } else {
   2238                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2239                                 mySourceChar = tmpSourceChar;
   2240                                 if (cs == KSC5601) {
   2241                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2242                                 }
   2243                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2244                                 tempBuf[1] = (char)(tmpSourceChar);
   2245                             }
   2246                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2247                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2248                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2249                             ++mySource;
   2250                             /* add another bit so that the code below writes 2 bytes in case of error */
   2251                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2252                         }
   2253                     } else {
   2254                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2255                         args->converter->toULength = 1;
   2256                         goto endloop;
   2257                     }
   2258                 }  /* End of inner switch */
   2259                 break;
   2260             }  /* End of outer switch */
   2261             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2262                 if(args->offsets){
   2263                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2264                 }
   2265                 *(myTarget++)=(UChar)targetUniChar;
   2266             }
   2267             else if(targetUniChar > missingCharMarker){
   2268                 /* disassemble the surrogate pair and write to output*/
   2269                 targetUniChar-=0x0010000;
   2270                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2271                 if(args->offsets){
   2272                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2273                 }
   2274                 ++myTarget;
   2275                 if(myTarget< args->targetLimit){
   2276                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2277                     if(args->offsets){
   2278                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2279                     }
   2280                     ++myTarget;
   2281                 }else{
   2282                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2283                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2284                 }
   2285 
   2286             }
   2287             else{
   2288                 /* Call the callback function*/
   2289                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2290                 break;
   2291             }
   2292         }
   2293         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2294             *err =U_BUFFER_OVERFLOW_ERROR;
   2295             break;
   2296         }
   2297     }
   2298 endloop:
   2299     args->target = myTarget;
   2300     args->source = mySource;
   2301 }
   2302 
   2303 
   2304 /***************************************************************
   2305 *   Rules for ISO-2022-KR encoding
   2306 *   i) The KSC5601 designator sequence should appear only once in a file,
   2307 *      at the begining of a line before any KSC5601 characters. This usually
   2308 *      means that it appears by itself on the first line of the file
   2309 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2310 *      and SI to shift into single byte mode
   2311 */
   2312 static void
   2313 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2314 
   2315     UConverter* saveConv = args->converter;
   2316     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2317     args->converter=myConverterData->currentConverter;
   2318 
   2319     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2320     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2321     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2322 
   2323     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2324         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2325             uprv_memcpy(
   2326                 saveConv->charErrorBuffer,
   2327                 myConverterData->currentConverter->charErrorBuffer,
   2328                 myConverterData->currentConverter->charErrorBufferLength);
   2329         }
   2330         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2331         myConverterData->currentConverter->charErrorBufferLength = 0;
   2332     }
   2333     args->converter=saveConv;
   2334 }
   2335 
   2336 static void
   2337 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2338 
   2339     const UChar *source = args->source;
   2340     const UChar *sourceLimit = args->sourceLimit;
   2341     unsigned char *target = (unsigned char *) args->target;
   2342     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2343     int32_t* offsets = args->offsets;
   2344     uint32_t targetByteUnit = 0x0000;
   2345     UChar32 sourceChar = 0x0000;
   2346     UBool isTargetByteDBCS;
   2347     UBool oldIsTargetByteDBCS;
   2348     UConverterDataISO2022 *converterData;
   2349     UConverterSharedData* sharedData;
   2350     UBool useFallback;
   2351     int32_t length =0;
   2352 
   2353     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2354     /* if the version is 1 then the user is requesting
   2355      * conversion with ibm-25546 pass the arguments to
   2356      * MBCS converter and return
   2357      */
   2358     if(converterData->version==1){
   2359         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2360         return;
   2361     }
   2362 
   2363     /* initialize data */
   2364     sharedData = converterData->currentConverter->sharedData;
   2365     useFallback = args->converter->useFallback;
   2366     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2367     oldIsTargetByteDBCS = isTargetByteDBCS;
   2368 
   2369     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2370     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2371         goto getTrail;
   2372     }
   2373     while(source < sourceLimit){
   2374 
   2375         targetByteUnit = missingCharMarker;
   2376 
   2377         if(target < (unsigned char*) args->targetLimit){
   2378             sourceChar = *source++;
   2379 
   2380             /* do not convert SO/SI/ESC */
   2381             if(IS_2022_CONTROL(sourceChar)) {
   2382                 /* callback(illegal) */
   2383                 *err=U_ILLEGAL_CHAR_FOUND;
   2384                 args->converter->fromUChar32=sourceChar;
   2385                 break;
   2386             }
   2387 
   2388             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2389             if(length < 0) {
   2390                 length = -length;  /* fallback */
   2391             }
   2392             /* only DBCS or SBCS characters are expected*/
   2393             /* DB characters with high bit set to 1 are expected */
   2394             if( length > 2 || length==0 ||
   2395                 (length == 1 && targetByteUnit > 0x7f) ||
   2396                 (length == 2 &&
   2397                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2398                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2399             ) {
   2400                 targetByteUnit=missingCharMarker;
   2401             }
   2402             if (targetByteUnit != missingCharMarker){
   2403 
   2404                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2405                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2406                   /* append the shift sequence */
   2407                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2408 
   2409                     if (isTargetByteDBCS)
   2410                         *target++ = UCNV_SO;
   2411                     else
   2412                         *target++ = UCNV_SI;
   2413                     if(offsets)
   2414                         *(offsets++) = (int32_t)(source - args->source-1);
   2415                 }
   2416                 /* write the targetUniChar  to target */
   2417                 if(targetByteUnit <= 0x00FF){
   2418                     if( target < targetLimit){
   2419                         *(target++) = (unsigned char) targetByteUnit;
   2420                         if(offsets){
   2421                             *(offsets++) = (int32_t)(source - args->source-1);
   2422                         }
   2423 
   2424                     }else{
   2425                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2426                         *err = U_BUFFER_OVERFLOW_ERROR;
   2427                     }
   2428                 }else{
   2429                     if(target < targetLimit){
   2430                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2431                         if(offsets){
   2432                             *(offsets++) = (int32_t)(source - args->source-1);
   2433                         }
   2434                         if(target < targetLimit){
   2435                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2436                             if(offsets){
   2437                                 *(offsets++) = (int32_t)(source - args->source-1);
   2438                             }
   2439                         }else{
   2440                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2441                             *err = U_BUFFER_OVERFLOW_ERROR;
   2442                         }
   2443                     }else{
   2444                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2445                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2446                         *err = U_BUFFER_OVERFLOW_ERROR;
   2447                     }
   2448                 }
   2449 
   2450             }
   2451             else{
   2452                 /* oops.. the code point is unassingned
   2453                  * set the error and reason
   2454                  */
   2455 
   2456                 /*check if the char is a First surrogate*/
   2457                 if(U16_IS_SURROGATE(sourceChar)) {
   2458                     if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2459 getTrail:
   2460                         /*look ahead to find the trail surrogate*/
   2461                         if(source <  sourceLimit) {
   2462                             /* test the following code unit */
   2463                             UChar trail=(UChar) *source;
   2464                             if(U16_IS_TRAIL(trail)) {
   2465                                 source++;
   2466                                 sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2467                                 *err = U_INVALID_CHAR_FOUND;
   2468                                 /* convert this surrogate code point */
   2469                                 /* exit this condition tree */
   2470                             } else {
   2471                                 /* this is an unmatched lead code unit (1st surrogate) */
   2472                                 /* callback(illegal) */
   2473                                 *err=U_ILLEGAL_CHAR_FOUND;
   2474                             }
   2475                         } else {
   2476                             /* no more input */
   2477                             *err = U_ZERO_ERROR;
   2478                         }
   2479                     } else {
   2480                         /* this is an unmatched trail code unit (2nd surrogate) */
   2481                         /* callback(illegal) */
   2482                         *err=U_ILLEGAL_CHAR_FOUND;
   2483                     }
   2484                 } else {
   2485                     /* callback(unassigned) for a BMP code point */
   2486                     *err = U_INVALID_CHAR_FOUND;
   2487                 }
   2488 
   2489                 args->converter->fromUChar32=sourceChar;
   2490                 break;
   2491             }
   2492         } /* end if(myTargetIndex<myTargetLength) */
   2493         else{
   2494             *err =U_BUFFER_OVERFLOW_ERROR;
   2495             break;
   2496         }
   2497 
   2498     }/* end while(mySourceIndex<mySourceLength) */
   2499 
   2500     /*
   2501      * the end of the input stream and detection of truncated input
   2502      * are handled by the framework, but for ISO-2022-KR conversion
   2503      * we need to be in ASCII mode at the very end
   2504      *
   2505      * conditions:
   2506      *   successful
   2507      *   not in ASCII mode
   2508      *   end of input and no truncated input
   2509      */
   2510     if( U_SUCCESS(*err) &&
   2511         isTargetByteDBCS &&
   2512         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2513     ) {
   2514         int32_t sourceIndex;
   2515 
   2516         /* we are switching to ASCII */
   2517         isTargetByteDBCS=FALSE;
   2518 
   2519         /* get the source index of the last input character */
   2520         /*
   2521          * TODO this would be simpler and more reliable if we used a pair
   2522          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2523          * so that we could simply use the prevSourceIndex here;
   2524          * this code gives an incorrect result for the rare case of an unmatched
   2525          * trail surrogate that is alone in the last buffer of the text stream
   2526          */
   2527         sourceIndex=(int32_t)(source-args->source);
   2528         if(sourceIndex>0) {
   2529             --sourceIndex;
   2530             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2531                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2532             ) {
   2533                 --sourceIndex;
   2534             }
   2535         } else {
   2536             sourceIndex=-1;
   2537         }
   2538 
   2539         fromUWriteUInt8(
   2540             args->converter,
   2541             SHIFT_IN_STR, 1,
   2542             &target, (const char *)targetLimit,
   2543             &offsets, sourceIndex,
   2544             err);
   2545     }
   2546 
   2547     /*save the state and return */
   2548     args->source = source;
   2549     args->target = (char*)target;
   2550     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2551 }
   2552 
   2553 /************************ To Unicode ***************************************/
   2554 
   2555 static void
   2556 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2557                                                             UErrorCode* err){
   2558     char const* sourceStart;
   2559     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2560 
   2561     UConverterToUnicodeArgs subArgs;
   2562     int32_t minArgsSize;
   2563 
   2564     /* set up the subconverter arguments */
   2565     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2566         minArgsSize = args->size;
   2567     } else {
   2568         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2569     }
   2570 
   2571     uprv_memcpy(&subArgs, args, minArgsSize);
   2572     subArgs.size = (uint16_t)minArgsSize;
   2573     subArgs.converter = myData->currentConverter;
   2574 
   2575     /* remember the original start of the input for offsets */
   2576     sourceStart = args->source;
   2577 
   2578     if(myData->key != 0) {
   2579         /* continue with a partial escape sequence */
   2580         goto escape;
   2581     }
   2582 
   2583     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2584         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2585         subArgs.source = args->source;
   2586         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2587         if(subArgs.source != subArgs.sourceLimit) {
   2588             /*
   2589              * get the current partial byte sequence
   2590              *
   2591              * it needs to be moved between the public and the subconverter
   2592              * so that the conversion framework, which only sees the public
   2593              * converter, can handle truncated and illegal input etc.
   2594              */
   2595             if(args->converter->toULength > 0) {
   2596                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2597             }
   2598             subArgs.converter->toULength = args->converter->toULength;
   2599 
   2600             /*
   2601              * Convert up to the end of the input, or to before the next escape character.
   2602              * Does not handle conversion extensions because the preToU[] state etc.
   2603              * is not copied.
   2604              */
   2605             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2606 
   2607             if(args->offsets != NULL && sourceStart != args->source) {
   2608                 /* update offsets to base them on the actual start of the input */
   2609                 int32_t *offsets = args->offsets;
   2610                 UChar *target = args->target;
   2611                 int32_t delta = (int32_t)(args->source - sourceStart);
   2612                 while(target < subArgs.target) {
   2613                     if(*offsets >= 0) {
   2614                         *offsets += delta;
   2615                     }
   2616                     ++offsets;
   2617                     ++target;
   2618                 }
   2619             }
   2620             args->source = subArgs.source;
   2621             args->target = subArgs.target;
   2622             args->offsets = subArgs.offsets;
   2623 
   2624             /* copy input/error/overflow buffers */
   2625             if(subArgs.converter->toULength > 0) {
   2626                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2627             }
   2628             args->converter->toULength = subArgs.converter->toULength;
   2629 
   2630             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2631                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2632                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2633                                 subArgs.converter->UCharErrorBufferLength);
   2634                 }
   2635                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2636                 subArgs.converter->UCharErrorBufferLength = 0;
   2637             }
   2638         }
   2639 
   2640         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2641             return;
   2642         }
   2643 
   2644 escape:
   2645         changeState_2022(args->converter,
   2646                &(args->source),
   2647                args->sourceLimit,
   2648                ISO_2022_KR,
   2649                err);
   2650     }
   2651 }
   2652 
   2653 static void
   2654 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2655                                                             UErrorCode* err){
   2656     char tempBuf[2];
   2657     const char *mySource = ( char *) args->source;
   2658     UChar *myTarget = args->target;
   2659     const char *mySourceLimit = args->sourceLimit;
   2660     UChar32 targetUniChar = 0x0000;
   2661     UChar mySourceChar = 0x0000;
   2662     UConverterDataISO2022* myData;
   2663     UConverterSharedData* sharedData ;
   2664     UBool useFallback;
   2665 
   2666     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2667     if(myData->version==1){
   2668         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2669         return;
   2670     }
   2671 
   2672     /* initialize state */
   2673     sharedData = myData->currentConverter->sharedData;
   2674     useFallback = args->converter->useFallback;
   2675 
   2676     if(myData->key != 0) {
   2677         /* continue with a partial escape sequence */
   2678         goto escape;
   2679     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2680         /* continue with a partial double-byte character */
   2681         mySourceChar = args->converter->toUBytes[0];
   2682         args->converter->toULength = 0;
   2683         goto getTrailByte;
   2684     }
   2685 
   2686     while(mySource< mySourceLimit){
   2687 
   2688         if(myTarget < args->targetLimit){
   2689 
   2690             mySourceChar= (unsigned char) *mySource++;
   2691 
   2692             if(mySourceChar==UCNV_SI){
   2693                 myData->toU2022State.g = 0;
   2694                 if (myData->isEmptySegment) {
   2695                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2696                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2697                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2698                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2699                     args->converter->toULength = 1;
   2700                     args->target = myTarget;
   2701                     args->source = mySource;
   2702                     return;
   2703                 }
   2704                 /*consume the source */
   2705                 continue;
   2706             }else if(mySourceChar==UCNV_SO){
   2707                 myData->toU2022State.g = 1;
   2708                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2709                 /*consume the source */
   2710                 continue;
   2711             }else if(mySourceChar==ESC_2022){
   2712                 mySource--;
   2713 escape:
   2714                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2715                 changeState_2022(args->converter,&(mySource),
   2716                                 mySourceLimit, ISO_2022_KR, err);
   2717                 if(U_FAILURE(*err)){
   2718                     args->target = myTarget;
   2719                     args->source = mySource;
   2720                     return;
   2721                 }
   2722                 continue;
   2723             }
   2724 
   2725             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2726             if(myData->toU2022State.g == 1) {
   2727                 if(mySource < mySourceLimit) {
   2728                     int leadIsOk, trailIsOk;
   2729                     uint8_t trailByte;
   2730 getTrailByte:
   2731                     targetUniChar = missingCharMarker;
   2732                     trailByte = (uint8_t)*mySource;
   2733                     /*
   2734                      * Ticket 5691: consistent illegal sequences:
   2735                      * - We include at least the first byte in the illegal sequence.
   2736                      * - If any of the non-initial bytes could be the start of a character,
   2737                      *   we stop the illegal sequence before the first one of those.
   2738                      *
   2739                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2740                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2741                      * Otherwise we convert or report the pair of bytes.
   2742                      */
   2743                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2744                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2745                     if (leadIsOk && trailIsOk) {
   2746                         ++mySource;
   2747                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2748                         tempBuf[1] = (char)(trailByte + 0x80);
   2749                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2750                         mySourceChar = (mySourceChar << 8) | trailByte;
   2751                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2752                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2753                         ++mySource;
   2754                         /* add another bit so that the code below writes 2 bytes in case of error */
   2755                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2756                     }
   2757                 } else {
   2758                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2759                     args->converter->toULength = 1;
   2760                     break;
   2761                 }
   2762             }
   2763             else if(mySourceChar <= 0x7f) {
   2764                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2765             } else {
   2766                 targetUniChar = 0xffff;
   2767             }
   2768             if(targetUniChar < 0xfffe){
   2769                 if(args->offsets) {
   2770                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2771                 }
   2772                 *(myTarget++)=(UChar)targetUniChar;
   2773             }
   2774             else {
   2775                 /* Call the callback function*/
   2776                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2777                 break;
   2778             }
   2779         }
   2780         else{
   2781             *err =U_BUFFER_OVERFLOW_ERROR;
   2782             break;
   2783         }
   2784     }
   2785     args->target = myTarget;
   2786     args->source = mySource;
   2787 }
   2788 
   2789 /*************************** END ISO2022-KR *********************************/
   2790 
   2791 /*************************** ISO-2022-CN *********************************
   2792 *
   2793 * Rules for ISO-2022-CN Encoding:
   2794 * i)   The designator sequence must appear once on a line before any instance
   2795 *      of character set it designates.
   2796 * ii)  If two lines contain characters from the same character set, both lines
   2797 *      must include the designator sequence.
   2798 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2799 *      to invoke the  shifting
   2800 * iv)  All lines start in ASCII and end in ASCII.
   2801 * v)   Four shifting sequences are employed for this purpose:
   2802 *
   2803 *      Sequcence   ASCII Eq    Charsets
   2804 *      ----------  -------    ---------
   2805 *      SI           <SI>        US-ASCII
   2806 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2807 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2808 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2809 *
   2810 * vi)
   2811 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2812 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2813 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2814 *
   2815 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2816 *       characters as defined in GB 2312-80, until
   2817 *       another SOdesignation appears
   2818 *
   2819 *
   2820 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2821 *       in ISO-IR-165 (for details, see section 2.1),
   2822 *       until another SOdesignation appears
   2823 *
   2824 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2825 *       in CNS 11643-plane-1, until another
   2826 *       SOdesignation appears
   2827 *
   2828 *      ESC $ * H       Indicates the two bytes immediately following
   2829 *       SS2 is a Chinese character as defined in CNS
   2830 *       11643-plane-2, until another SS2designation
   2831 *       appears
   2832 *       (Meaning <ESC>N must preceed every 2 byte
   2833 *        sequence.)
   2834 *
   2835 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2836 *       is a Chinese character as defined in CNS
   2837 *       11643-plane-3, until another SS3designation
   2838 *       appears
   2839 *       (Meaning <ESC>O must preceed every 2 byte
   2840 *        sequence.)
   2841 *
   2842 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2843 *       is a Chinese character as defined in CNS
   2844 *       11643-plane-4, until another SS3designation
   2845 *       appears
   2846 *       (In English: <ESC>O must preceed every 2 byte
   2847 *        sequence.)
   2848 *
   2849 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2850 *       is a Chinese character as defined in CNS
   2851 *       11643-plane-5, until another SS3designation
   2852 *       appears
   2853 *
   2854 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2855 *       is a Chinese character as defined in CNS
   2856 *       11643-plane-6, until another SS3designation
   2857 *       appears
   2858 *
   2859 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2860 *       is a Chinese character as defined in CNS
   2861 *       11643-plane-7, until another SS3designation
   2862 *       appears
   2863 *
   2864 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2865 *       has its own designation information before any Chinese characters
   2866 *       appear
   2867 *
   2868 */
   2869 
   2870 /* The following are defined this way to make the strings truly readonly */
   2871 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2872 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2873 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2874 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2875 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2876 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2877 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2878 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2879 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2880 
   2881 /********************** ISO2022-CN Data **************************/
   2882 static const char* const escSeqCharsCN[10] ={
   2883         SHIFT_IN_STR,                   /* 0 ASCII */
   2884         GB_2312_80_STR,                 /* 1 GB2312_1 */
   2885         ISO_IR_165_STR,                 /* 2 ISO_IR_165 */
   2886         CNS_11643_1992_Plane_1_STR,
   2887         CNS_11643_1992_Plane_2_STR,
   2888         CNS_11643_1992_Plane_3_STR,
   2889         CNS_11643_1992_Plane_4_STR,
   2890         CNS_11643_1992_Plane_5_STR,
   2891         CNS_11643_1992_Plane_6_STR,
   2892         CNS_11643_1992_Plane_7_STR
   2893 };
   2894 
   2895 static void
   2896 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2897     UConverter *cnv = args->converter;
   2898     UConverterDataISO2022 *converterData;
   2899     ISO2022State *pFromU2022State;
   2900     uint8_t *target = (uint8_t *) args->target;
   2901     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2902     const UChar* source = args->source;
   2903     const UChar* sourceLimit = args->sourceLimit;
   2904     int32_t* offsets = args->offsets;
   2905     UChar32 sourceChar;
   2906     char buffer[8];
   2907     int32_t len;
   2908     int8_t choices[3];
   2909     int32_t choiceCount;
   2910     uint32_t targetValue = 0;
   2911     UBool useFallback;
   2912 
   2913     /* set up the state */
   2914     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2915     pFromU2022State   = &converterData->fromU2022State;
   2916 
   2917     choiceCount = 0;
   2918 
   2919     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2920     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2921         goto getTrail;
   2922     }
   2923 
   2924     while( source < sourceLimit){
   2925         if(target < targetLimit){
   2926 
   2927             sourceChar  = *(source++);
   2928             /*check if the char is a First surrogate*/
   2929              if(U16_IS_SURROGATE(sourceChar)) {
   2930                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2931 getTrail:
   2932                     /*look ahead to find the trail surrogate*/
   2933                     if(source < sourceLimit) {
   2934                         /* test the following code unit */
   2935                         UChar trail=(UChar) *source;
   2936                         if(U16_IS_TRAIL(trail)) {
   2937                             source++;
   2938                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2939                             cnv->fromUChar32=0x00;
   2940                             /* convert this supplementary code point */
   2941                             /* exit this condition tree */
   2942                         } else {
   2943                             /* this is an unmatched lead code unit (1st surrogate) */
   2944                             /* callback(illegal) */
   2945                             *err=U_ILLEGAL_CHAR_FOUND;
   2946                             cnv->fromUChar32=sourceChar;
   2947                             break;
   2948                         }
   2949                     } else {
   2950                         /* no more input */
   2951                         cnv->fromUChar32=sourceChar;
   2952                         break;
   2953                     }
   2954                 } else {
   2955                     /* this is an unmatched trail code unit (2nd surrogate) */
   2956                     /* callback(illegal) */
   2957                     *err=U_ILLEGAL_CHAR_FOUND;
   2958                     cnv->fromUChar32=sourceChar;
   2959                     break;
   2960                 }
   2961             }
   2962 
   2963             /* do the conversion */
   2964             if(sourceChar <= 0x007f ){
   2965                 /* do not convert SO/SI/ESC */
   2966                 if(IS_2022_CONTROL(sourceChar)) {
   2967                     /* callback(illegal) */
   2968                     *err=U_ILLEGAL_CHAR_FOUND;
   2969                     cnv->fromUChar32=sourceChar;
   2970                     break;
   2971                 }
   2972 
   2973                 /* US-ASCII */
   2974                 if(pFromU2022State->g == 0) {
   2975                     buffer[0] = (char)sourceChar;
   2976                     len = 1;
   2977                 } else {
   2978                     buffer[0] = UCNV_SI;
   2979                     buffer[1] = (char)sourceChar;
   2980                     len = 2;
   2981                     pFromU2022State->g = 0;
   2982                     choiceCount = 0;
   2983                 }
   2984                 if(sourceChar == CR || sourceChar == LF) {
   2985                     /* reset the state at the end of a line */
   2986                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2987                     choiceCount = 0;
   2988                 }
   2989             }
   2990             else{
   2991                 /* convert U+0080..U+10ffff */
   2992                 int32_t i;
   2993                 int8_t cs, g;
   2994 
   2995                 if(choiceCount == 0) {
   2996                     /* try the current SO/G1 converter first */
   2997                     choices[0] = pFromU2022State->cs[1];
   2998 
   2999                     /* default to GB2312_1 if none is designated yet */
   3000                     if(choices[0] == 0) {
   3001                         choices[0] = GB2312_1;
   3002                     }
   3003 
   3004                     if(converterData->version == 0) {
   3005                         /* ISO-2022-CN */
   3006 
   3007                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   3008                         if(choices[0] == GB2312_1) {
   3009                             choices[1] = (int8_t)CNS_11643_1;
   3010                         } else {
   3011                             choices[1] = (int8_t)GB2312_1;
   3012                         }
   3013 
   3014                         choiceCount = 2;
   3015                     } else if (converterData->version == 1) {
   3016                         /* ISO-2022-CN-EXT */
   3017 
   3018                         /* try one of the other converters */
   3019                         switch(choices[0]) {
   3020                         case GB2312_1:
   3021                             choices[1] = (int8_t)CNS_11643_1;
   3022                             choices[2] = (int8_t)ISO_IR_165;
   3023                             break;
   3024                         case ISO_IR_165:
   3025                             choices[1] = (int8_t)GB2312_1;
   3026                             choices[2] = (int8_t)CNS_11643_1;
   3027                             break;
   3028                         default: /* CNS_11643_x */
   3029                             choices[1] = (int8_t)GB2312_1;
   3030                             choices[2] = (int8_t)ISO_IR_165;
   3031                             break;
   3032                         }
   3033 
   3034                         choiceCount = 3;
   3035                     } else {
   3036                         choices[0] = (int8_t)CNS_11643_1;
   3037                         choices[1] = (int8_t)GB2312_1;
   3038                     }
   3039                 }
   3040 
   3041                 cs = g = 0;
   3042                 /*
   3043                  * len==0: no mapping found yet
   3044                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3045                  * len>0: found a roundtrip result, done
   3046                  */
   3047                 len = 0;
   3048                 /*
   3049                  * We will turn off useFallback after finding a fallback,
   3050                  * but we still get fallbacks from PUA code points as usual.
   3051                  * Therefore, we will also need to check that we don't overwrite
   3052                  * an early fallback with a later one.
   3053                  */
   3054                 useFallback = cnv->useFallback;
   3055 
   3056                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3057                     int8_t cs0 = choices[i];
   3058                     if(cs0 > 0) {
   3059                         uint32_t value;
   3060                         int32_t len2;
   3061                         if(cs0 >= CNS_11643_0) {
   3062                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3063                                         converterData->myConverterArray[CNS_11643],
   3064                                         sourceChar,
   3065                                         &value,
   3066                                         useFallback,
   3067                                         MBCS_OUTPUT_3);
   3068                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3069                                 targetValue = value;
   3070                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3071                                 if(len2 >= 0) {
   3072                                     len = 2;
   3073                                 } else {
   3074                                     len = -2;
   3075                                     useFallback = FALSE;
   3076                                 }
   3077                                 if(cs == CNS_11643_1) {
   3078                                     g = 1;
   3079                                 } else if(cs == CNS_11643_2) {
   3080                                     g = 2;
   3081                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3082                                     g = 3;
   3083                                 } else {
   3084                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3085                                     len = 0;
   3086                                 }
   3087                             }
   3088                         } else {
   3089                             /* GB2312_1 or ISO-IR-165 */
   3090                             U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
   3091                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3092                                         converterData->myConverterArray[cs0],
   3093                                         sourceChar,
   3094                                         &value,
   3095                                         useFallback,
   3096                                         MBCS_OUTPUT_2);
   3097                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3098                                 targetValue = value;
   3099                                 len = len2;
   3100                                 cs = cs0;
   3101                                 g = 1;
   3102                                 useFallback = FALSE;
   3103                             }
   3104                         }
   3105                     }
   3106                 }
   3107 
   3108                 if(len != 0) {
   3109                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3110 
   3111                     /* write the designation sequence if necessary */
   3112                     if(cs != pFromU2022State->cs[g]) {
   3113                         if(cs < CNS_11643) {
   3114                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3115                         } else {
   3116                             U_ASSERT(cs >= CNS_11643_1);
   3117                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3118                         }
   3119                         len = 4;
   3120                         pFromU2022State->cs[g] = cs;
   3121                         if(g == 1) {
   3122                             /* changing the SO/G1 charset invalidates the choices[] */
   3123                             choiceCount = 0;
   3124                         }
   3125                     }
   3126 
   3127                     /* write the shift sequence if necessary */
   3128                     if(g != pFromU2022State->g) {
   3129                         switch(g) {
   3130                         case 1:
   3131                             buffer[len++] = UCNV_SO;
   3132 
   3133                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3134                             pFromU2022State->g = 1;
   3135                             break;
   3136                         case 2:
   3137                             buffer[len++] = 0x1b;
   3138                             buffer[len++] = 0x4e;
   3139                             break;
   3140                         default: /* case 3 */
   3141                             buffer[len++] = 0x1b;
   3142                             buffer[len++] = 0x4f;
   3143                             break;
   3144                         }
   3145                     }
   3146 
   3147                     /* write the two output bytes */
   3148                     buffer[len++] = (char)(targetValue >> 8);
   3149                     buffer[len++] = (char)targetValue;
   3150                 } else {
   3151                     /* if we cannot find the character after checking all codepages
   3152                      * then this is an error
   3153                      */
   3154                     *err = U_INVALID_CHAR_FOUND;
   3155                     cnv->fromUChar32=sourceChar;
   3156                     break;
   3157                 }
   3158             }
   3159 
   3160             /* output len>0 bytes in buffer[] */
   3161             if(len == 1) {
   3162                 *target++ = buffer[0];
   3163                 if(offsets) {
   3164                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3165                 }
   3166             } else if(len == 2 && (target + 2) <= targetLimit) {
   3167                 *target++ = buffer[0];
   3168                 *target++ = buffer[1];
   3169                 if(offsets) {
   3170                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3171                     *offsets++ = sourceIndex;
   3172                     *offsets++ = sourceIndex;
   3173                 }
   3174             } else {
   3175                 fromUWriteUInt8(
   3176                     cnv,
   3177                     buffer, len,
   3178                     &target, (const char *)targetLimit,
   3179                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3180                     err);
   3181                 if(U_FAILURE(*err)) {
   3182                     break;
   3183                 }
   3184             }
   3185         } /* end if(myTargetIndex<myTargetLength) */
   3186         else{
   3187             *err =U_BUFFER_OVERFLOW_ERROR;
   3188             break;
   3189         }
   3190 
   3191     }/* end while(mySourceIndex<mySourceLength) */
   3192 
   3193     /*
   3194      * the end of the input stream and detection of truncated input
   3195      * are handled by the framework, but for ISO-2022-CN conversion
   3196      * we need to be in ASCII mode at the very end
   3197      *
   3198      * conditions:
   3199      *   successful
   3200      *   not in ASCII mode
   3201      *   end of input and no truncated input
   3202      */
   3203     if( U_SUCCESS(*err) &&
   3204         pFromU2022State->g!=0 &&
   3205         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3206     ) {
   3207         int32_t sourceIndex;
   3208 
   3209         /* we are switching to ASCII */
   3210         pFromU2022State->g=0;
   3211 
   3212         /* get the source index of the last input character */
   3213         /*
   3214          * TODO this would be simpler and more reliable if we used a pair
   3215          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3216          * so that we could simply use the prevSourceIndex here;
   3217          * this code gives an incorrect result for the rare case of an unmatched
   3218          * trail surrogate that is alone in the last buffer of the text stream
   3219          */
   3220         sourceIndex=(int32_t)(source-args->source);
   3221         if(sourceIndex>0) {
   3222             --sourceIndex;
   3223             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3224                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3225             ) {
   3226                 --sourceIndex;
   3227             }
   3228         } else {
   3229             sourceIndex=-1;
   3230         }
   3231 
   3232         fromUWriteUInt8(
   3233             cnv,
   3234             SHIFT_IN_STR, 1,
   3235             &target, (const char *)targetLimit,
   3236             &offsets, sourceIndex,
   3237             err);
   3238     }
   3239 
   3240     /*save the state and return */
   3241     args->source = source;
   3242     args->target = (char*)target;
   3243 }
   3244 
   3245 
   3246 static void
   3247 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3248                                                UErrorCode* err){
   3249     char tempBuf[3];
   3250     const char *mySource = (char *) args->source;
   3251     UChar *myTarget = args->target;
   3252     const char *mySourceLimit = args->sourceLimit;
   3253     uint32_t targetUniChar = 0x0000;
   3254     uint32_t mySourceChar = 0x0000;
   3255     UConverterDataISO2022* myData;
   3256     ISO2022State *pToU2022State;
   3257 
   3258     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3259     pToU2022State = &myData->toU2022State;
   3260 
   3261     if(myData->key != 0) {
   3262         /* continue with a partial escape sequence */
   3263         goto escape;
   3264     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3265         /* continue with a partial double-byte character */
   3266         mySourceChar = args->converter->toUBytes[0];
   3267         args->converter->toULength = 0;
   3268         targetUniChar = missingCharMarker;
   3269         goto getTrailByte;
   3270     }
   3271 
   3272     while(mySource < mySourceLimit){
   3273 
   3274         targetUniChar =missingCharMarker;
   3275 
   3276         if(myTarget < args->targetLimit){
   3277 
   3278             mySourceChar= (unsigned char) *mySource++;
   3279 
   3280             switch(mySourceChar){
   3281             case UCNV_SI:
   3282                 pToU2022State->g=0;
   3283                 if (myData->isEmptySegment) {
   3284                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3285                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3286                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3287                     args->converter->toUBytes[0] = mySourceChar;
   3288                     args->converter->toULength = 1;
   3289                     args->target = myTarget;
   3290                     args->source = mySource;
   3291                     return;
   3292                 }
   3293                 continue;
   3294 
   3295             case UCNV_SO:
   3296                 if(pToU2022State->cs[1] != 0) {
   3297                     pToU2022State->g=1;
   3298                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3299                     continue;
   3300                 } else {
   3301                     /* illegal to have SO before a matching designator */
   3302                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3303                     break;
   3304                 }
   3305 
   3306             case ESC_2022:
   3307                 mySource--;
   3308 escape:
   3309                 {
   3310                     const char * mySourceBefore = mySource;
   3311                     int8_t toULengthBefore = args->converter->toULength;
   3312 
   3313                     changeState_2022(args->converter,&(mySource),
   3314                         mySourceLimit, ISO_2022_CN,err);
   3315 
   3316                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3317                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3318                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3319                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3320                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   3321                     }
   3322                 }
   3323 
   3324                 /* invalid or illegal escape sequence */
   3325                 if(U_FAILURE(*err)){
   3326                     args->target = myTarget;
   3327                     args->source = mySource;
   3328                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3329                     return;
   3330                 }
   3331                 continue;
   3332 
   3333             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3334 
   3335             case CR:
   3336                 /*falls through*/
   3337             case LF:
   3338                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3339                 /* falls through */
   3340             default:
   3341                 /* convert one or two bytes */
   3342                 myData->isEmptySegment = FALSE;
   3343                 if(pToU2022State->g != 0) {
   3344                     if(mySource < mySourceLimit) {
   3345                         UConverterSharedData *cnv;
   3346                         StateEnum tempState;
   3347                         int32_t tempBufLen;
   3348                         int leadIsOk, trailIsOk;
   3349                         uint8_t trailByte;
   3350 getTrailByte:
   3351                         trailByte = (uint8_t)*mySource;
   3352                         /*
   3353                          * Ticket 5691: consistent illegal sequences:
   3354                          * - We include at least the first byte in the illegal sequence.
   3355                          * - If any of the non-initial bytes could be the start of a character,
   3356                          *   we stop the illegal sequence before the first one of those.
   3357                          *
   3358                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3359                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3360                          * Otherwise we convert or report the pair of bytes.
   3361                          */
   3362                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3363                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3364                         if (leadIsOk && trailIsOk) {
   3365                             ++mySource;
   3366                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3367                             if(tempState >= CNS_11643_0) {
   3368                                 cnv = myData->myConverterArray[CNS_11643];
   3369                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3370                                 tempBuf[1] = (char) (mySourceChar);
   3371                                 tempBuf[2] = (char) trailByte;
   3372                                 tempBufLen = 3;
   3373 
   3374                             }else{
   3375                                 U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
   3376                                 cnv = myData->myConverterArray[tempState];
   3377                                 tempBuf[0] = (char) (mySourceChar);
   3378                                 tempBuf[1] = (char) trailByte;
   3379                                 tempBufLen = 2;
   3380                             }
   3381                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3382                             mySourceChar = (mySourceChar << 8) | trailByte;
   3383                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3384                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3385                             ++mySource;
   3386                             /* add another bit so that the code below writes 2 bytes in case of error */
   3387                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3388                         }
   3389                         if(pToU2022State->g>=2) {
   3390                             /* return from a single-shift state to the previous one */
   3391                             pToU2022State->g=pToU2022State->prevG;
   3392                         }
   3393                     } else {
   3394                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3395                         args->converter->toULength = 1;
   3396                         goto endloop;
   3397                     }
   3398                 }
   3399                 else{
   3400                     if(mySourceChar <= 0x7f) {
   3401                         targetUniChar = (UChar) mySourceChar;
   3402                     }
   3403                 }
   3404                 break;
   3405             }
   3406             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3407                 if(args->offsets){
   3408                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3409                 }
   3410                 *(myTarget++)=(UChar)targetUniChar;
   3411             }
   3412             else if(targetUniChar > missingCharMarker){
   3413                 /* disassemble the surrogate pair and write to output*/
   3414                 targetUniChar-=0x0010000;
   3415                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3416                 if(args->offsets){
   3417                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3418                 }
   3419                 ++myTarget;
   3420                 if(myTarget< args->targetLimit){
   3421                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3422                     if(args->offsets){
   3423                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3424                     }
   3425                     ++myTarget;
   3426                 }else{
   3427                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3428                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3429                 }
   3430 
   3431             }
   3432             else{
   3433                 /* Call the callback function*/
   3434                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3435                 break;
   3436             }
   3437         }
   3438         else{
   3439             *err =U_BUFFER_OVERFLOW_ERROR;
   3440             break;
   3441         }
   3442     }
   3443 endloop:
   3444     args->target = myTarget;
   3445     args->source = mySource;
   3446 }
   3447 
   3448 static void
   3449 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3450     UConverter *cnv = args->converter;
   3451     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3452     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3453     char *p, *subchar;
   3454     char buffer[8];
   3455     int32_t length;
   3456 
   3457     subchar=(char *)cnv->subChars;
   3458     length=cnv->subCharLen; /* assume length==1 for most variants */
   3459 
   3460     p = buffer;
   3461     switch(myConverterData->locale[0]){
   3462     case 'j':
   3463         {
   3464             int8_t cs;
   3465 
   3466             if(pFromU2022State->g == 1) {
   3467                 /* JIS7: switch from G1 to G0 */
   3468                 pFromU2022State->g = 0;
   3469                 *p++ = UCNV_SI;
   3470             }
   3471 
   3472             cs = pFromU2022State->cs[0];
   3473             if(cs != ASCII && cs != JISX201) {
   3474                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3475                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3476                 *p++ = '\x1b';
   3477                 *p++ = '\x28';
   3478                 *p++ = '\x42';
   3479             }
   3480 
   3481             *p++ = subchar[0];
   3482             break;
   3483         }
   3484     case 'c':
   3485         if(pFromU2022State->g != 0) {
   3486             /* not in ASCII mode: switch to ASCII */
   3487             pFromU2022State->g = 0;
   3488             *p++ = UCNV_SI;
   3489         }
   3490         *p++ = subchar[0];
   3491         break;
   3492     case 'k':
   3493         if(myConverterData->version == 0) {
   3494             if(length == 1) {
   3495                 if((UBool)args->converter->fromUnicodeStatus) {
   3496                     /* in DBCS mode: switch to SBCS */
   3497                     args->converter->fromUnicodeStatus = 0;
   3498                     *p++ = UCNV_SI;
   3499                 }
   3500                 *p++ = subchar[0];
   3501             } else /* length == 2*/ {
   3502                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3503                     /* in SBCS mode: switch to DBCS */
   3504                     args->converter->fromUnicodeStatus = 1;
   3505                     *p++ = UCNV_SO;
   3506                 }
   3507                 *p++ = subchar[0];
   3508                 *p++ = subchar[1];
   3509             }
   3510             break;
   3511         } else {
   3512             /* save the subconverter's substitution string */
   3513             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3514             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3515 
   3516             /* set our substitution string into the subconverter */
   3517             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3518             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3519 
   3520             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3521             args->converter = myConverterData->currentConverter;
   3522             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3523             ucnv_cbFromUWriteSub(args, 0, err);
   3524             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3525             args->converter = cnv;
   3526 
   3527             /* restore the subconverter's substitution string */
   3528             myConverterData->currentConverter->subChars = currentSubChars;
   3529             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3530 
   3531             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3532                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3533                     uprv_memcpy(
   3534                         cnv->charErrorBuffer,
   3535                         myConverterData->currentConverter->charErrorBuffer,
   3536                         myConverterData->currentConverter->charErrorBufferLength);
   3537                 }
   3538                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3539                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3540             }
   3541             return;
   3542         }
   3543     default:
   3544         /* not expected */
   3545         break;
   3546     }
   3547     ucnv_cbFromUWriteBytes(args,
   3548                            buffer, (int32_t)(p - buffer),
   3549                            offsetIndex, err);
   3550 }
   3551 
   3552 /*
   3553  * Structure for cloning an ISO 2022 converter into a single memory block.
   3554  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3555  * and then ucnv_safeClone() of the sub-converter may additionally align
   3556  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3557  * after currentConverter.
   3558  * This is because UAlignedMemory may be larger than the actually
   3559  * necessary alignment size for the platform.
   3560  * The other cloneStruct fields will not be moved around,
   3561  * and are aligned properly with cloneStruct's alignment.
   3562  */
   3563 struct cloneStruct
   3564 {
   3565     UConverter cnv;
   3566     UConverter currentConverter;
   3567     UAlignedMemory deadSpace;
   3568     UConverterDataISO2022 mydata;
   3569 };
   3570 
   3571 
   3572 static UConverter *
   3573 _ISO_2022_SafeClone(
   3574             const UConverter *cnv,
   3575             void *stackBuffer,
   3576             int32_t *pBufferSize,
   3577             UErrorCode *status)
   3578 {
   3579     struct cloneStruct * localClone;
   3580     UConverterDataISO2022 *cnvData;
   3581     int32_t i, size;
   3582 
   3583     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3584         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3585         return NULL;
   3586     }
   3587 
   3588     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3589     localClone = (struct cloneStruct *)stackBuffer;
   3590 
   3591     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3592 
   3593     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3594     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3595     localClone->cnv.isExtraLocal = TRUE;
   3596 
   3597     /* share the subconverters */
   3598 
   3599     if(cnvData->currentConverter != NULL) {
   3600         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3601         localClone->mydata.currentConverter =
   3602             ucnv_safeClone(cnvData->currentConverter,
   3603                             &localClone->currentConverter,
   3604                             &size, status);
   3605         if(U_FAILURE(*status)) {
   3606             return NULL;
   3607         }
   3608     }
   3609 
   3610     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3611         if(cnvData->myConverterArray[i] != NULL) {
   3612             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3613         }
   3614     }
   3615 
   3616     return &localClone->cnv;
   3617 }
   3618 
   3619 static void
   3620 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3621                     const USetAdder *sa,
   3622                     UConverterUnicodeSet which,
   3623                     UErrorCode *pErrorCode)
   3624 {
   3625     int32_t i;
   3626     UConverterDataISO2022* cnvData;
   3627 
   3628     if (U_FAILURE(*pErrorCode)) {
   3629         return;
   3630     }
   3631 #ifdef U_ENABLE_GENERIC_ISO_2022
   3632     if (cnv->sharedData == &_ISO2022Data) {
   3633         /* We use UTF-8 in this case */
   3634         sa->addRange(sa->set, 0, 0xd7FF);
   3635         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3636         return;
   3637     }
   3638 #endif
   3639 
   3640     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3641 
   3642     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3643     switch(cnvData->locale[0]){
   3644     case 'j':
   3645         /* include JIS X 0201 which is hardcoded */
   3646         sa->add(sa->set, 0xa5);
   3647         sa->add(sa->set, 0x203e);
   3648         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3649             /* include Latin-1 for some variants of JP */
   3650             sa->addRange(sa->set, 0, 0xff);
   3651         } else {
   3652             /* include ASCII for JP */
   3653             sa->addRange(sa->set, 0, 0x7f);
   3654         }
   3655         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3656             /*
   3657              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3658              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3659              * use half-width Katakana.
   3660              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3661              * half-width Katakana via the ESC ( I sequence.
   3662              * However, we only emit (fromUnicode) half-width Katakana according to the
   3663              * definition of each variant.
   3664              *
   3665              * When including fallbacks,
   3666              * we need to include half-width Katakana Unicode code points for all JP variants because
   3667              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3668              */
   3669             /* include half-width Katakana for JP */
   3670             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3671         }
   3672         break;
   3673     case 'c':
   3674     case 'z':
   3675         /* include ASCII for CN */
   3676         sa->addRange(sa->set, 0, 0x7f);
   3677         break;
   3678     case 'k':
   3679         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3680         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3681                 cnvData->currentConverter, sa, which, pErrorCode);
   3682         /* the loop over myConverterArray[] will simply not find another converter */
   3683         break;
   3684     default:
   3685         break;
   3686     }
   3687 
   3688 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3689             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3690                 cnvData->version==0 && i==CNS_11643
   3691             ) {
   3692                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3693                 ucnv_MBCSGetUnicodeSetForBytes(
   3694                         cnvData->myConverterArray[i],
   3695                         sa, UCNV_ROUNDTRIP_SET,
   3696                         0, 0x81, 0x82,
   3697                         pErrorCode);
   3698             }
   3699 #endif
   3700 
   3701     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3702         UConverterSetFilter filter;
   3703         if(cnvData->myConverterArray[i]!=NULL) {
   3704             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3705                 cnvData->version==0 && i==CNS_11643
   3706             ) {
   3707                 /*
   3708                  * Version-specific for CN:
   3709                  * CN version 0 does not map CNS planes 3..7 although
   3710                  * they are all available in the CNS conversion table;
   3711                  * CN version 1 (-EXT) does map them all.
   3712                  * The two versions create different Unicode sets.
   3713                  */
   3714                 filter=UCNV_SET_FILTER_2022_CN;
   3715             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3716                 /*
   3717                  * Only add code points that map to Shift-JIS codes
   3718                  * corresponding to JIS X 0208.
   3719                  */
   3720                 filter=UCNV_SET_FILTER_SJIS;
   3721             } else if(i==KSC5601) {
   3722                 /*
   3723                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3724                  * are broader than GR94.
   3725                  */
   3726                 filter=UCNV_SET_FILTER_GR94DBCS;
   3727             } else {
   3728                 filter=UCNV_SET_FILTER_NONE;
   3729             }
   3730             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3731         }
   3732     }
   3733 
   3734     /*
   3735      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3736      * sub-converters do by themselves.
   3737      * Remove these characters from the set.
   3738      */
   3739     sa->remove(sa->set, 0x0e);
   3740     sa->remove(sa->set, 0x0f);
   3741     sa->remove(sa->set, 0x1b);
   3742 
   3743     /* ISO 2022 converters do not convert C1 controls either */
   3744     sa->removeRange(sa->set, 0x80, 0x9f);
   3745 }
   3746 
   3747 static const UConverterImpl _ISO2022Impl={
   3748     UCNV_ISO_2022,
   3749 
   3750     NULL,
   3751     NULL,
   3752 
   3753     _ISO2022Open,
   3754     _ISO2022Close,
   3755     _ISO2022Reset,
   3756 
   3757 #ifdef U_ENABLE_GENERIC_ISO_2022
   3758     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3759     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3760     ucnv_fromUnicode_UTF8,
   3761     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3762 #else
   3763     NULL,
   3764     NULL,
   3765     NULL,
   3766     NULL,
   3767 #endif
   3768     NULL,
   3769 
   3770     NULL,
   3771     _ISO2022getName,
   3772     _ISO_2022_WriteSub,
   3773     _ISO_2022_SafeClone,
   3774     _ISO_2022_GetUnicodeSet,
   3775 
   3776     NULL,
   3777     NULL
   3778 };
   3779 static const UConverterStaticData _ISO2022StaticData={
   3780     sizeof(UConverterStaticData),
   3781     "ISO_2022",
   3782     2022,
   3783     UCNV_IBM,
   3784     UCNV_ISO_2022,
   3785     1,
   3786     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3787     { 0x1a, 0, 0, 0 },
   3788     1,
   3789     FALSE,
   3790     FALSE,
   3791     0,
   3792     0,
   3793     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3794 };
   3795 const UConverterSharedData _ISO2022Data={
   3796     sizeof(UConverterSharedData),
   3797     ~((uint32_t) 0),
   3798     NULL,
   3799     NULL,
   3800     &_ISO2022StaticData,
   3801     FALSE,
   3802     &_ISO2022Impl,
   3803     0, UCNV_MBCS_TABLE_INITIALIZER
   3804 };
   3805 
   3806 /*************JP****************/
   3807 static const UConverterImpl _ISO2022JPImpl={
   3808     UCNV_ISO_2022,
   3809 
   3810     NULL,
   3811     NULL,
   3812 
   3813     _ISO2022Open,
   3814     _ISO2022Close,
   3815     _ISO2022Reset,
   3816 
   3817     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3818     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3819     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3820     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3821     NULL,
   3822 
   3823     NULL,
   3824     _ISO2022getName,
   3825     _ISO_2022_WriteSub,
   3826     _ISO_2022_SafeClone,
   3827     _ISO_2022_GetUnicodeSet,
   3828 
   3829     NULL,
   3830     NULL
   3831 };
   3832 static const UConverterStaticData _ISO2022JPStaticData={
   3833     sizeof(UConverterStaticData),
   3834     "ISO_2022_JP",
   3835     0,
   3836     UCNV_IBM,
   3837     UCNV_ISO_2022,
   3838     1,
   3839     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3840     { 0x1a, 0, 0, 0 },
   3841     1,
   3842     FALSE,
   3843     FALSE,
   3844     0,
   3845     0,
   3846     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3847 };
   3848 
   3849 namespace {
   3850 
   3851 const UConverterSharedData _ISO2022JPData={
   3852     sizeof(UConverterSharedData),
   3853     ~((uint32_t) 0),
   3854     NULL,
   3855     NULL,
   3856     &_ISO2022JPStaticData,
   3857     FALSE,
   3858     &_ISO2022JPImpl,
   3859     0, UCNV_MBCS_TABLE_INITIALIZER
   3860 };
   3861 
   3862 }  // namespace
   3863 
   3864 /************* KR ***************/
   3865 static const UConverterImpl _ISO2022KRImpl={
   3866     UCNV_ISO_2022,
   3867 
   3868     NULL,
   3869     NULL,
   3870 
   3871     _ISO2022Open,
   3872     _ISO2022Close,
   3873     _ISO2022Reset,
   3874 
   3875     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3876     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3877     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3878     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3879     NULL,
   3880 
   3881     NULL,
   3882     _ISO2022getName,
   3883     _ISO_2022_WriteSub,
   3884     _ISO_2022_SafeClone,
   3885     _ISO_2022_GetUnicodeSet,
   3886 
   3887     NULL,
   3888     NULL
   3889 };
   3890 static const UConverterStaticData _ISO2022KRStaticData={
   3891     sizeof(UConverterStaticData),
   3892     "ISO_2022_KR",
   3893     0,
   3894     UCNV_IBM,
   3895     UCNV_ISO_2022,
   3896     1,
   3897     3, /* max 3 bytes per UChar: SO+DBCS */
   3898     { 0x1a, 0, 0, 0 },
   3899     1,
   3900     FALSE,
   3901     FALSE,
   3902     0,
   3903     0,
   3904     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3905 };
   3906 
   3907 namespace {
   3908 
   3909 const UConverterSharedData _ISO2022KRData={
   3910     sizeof(UConverterSharedData),
   3911     ~((uint32_t) 0),
   3912     NULL,
   3913     NULL,
   3914     &_ISO2022KRStaticData,
   3915     FALSE,
   3916     &_ISO2022KRImpl,
   3917     0, UCNV_MBCS_TABLE_INITIALIZER
   3918 };
   3919 
   3920 }  // namespace
   3921 
   3922 /*************** CN ***************/
   3923 static const UConverterImpl _ISO2022CNImpl={
   3924 
   3925     UCNV_ISO_2022,
   3926 
   3927     NULL,
   3928     NULL,
   3929 
   3930     _ISO2022Open,
   3931     _ISO2022Close,
   3932     _ISO2022Reset,
   3933 
   3934     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3935     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3936     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3937     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3938     NULL,
   3939 
   3940     NULL,
   3941     _ISO2022getName,
   3942     _ISO_2022_WriteSub,
   3943     _ISO_2022_SafeClone,
   3944     _ISO_2022_GetUnicodeSet,
   3945 
   3946     NULL,
   3947     NULL
   3948 };
   3949 static const UConverterStaticData _ISO2022CNStaticData={
   3950     sizeof(UConverterStaticData),
   3951     "ISO_2022_CN",
   3952     0,
   3953     UCNV_IBM,
   3954     UCNV_ISO_2022,
   3955     1,
   3956     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3957     { 0x1a, 0, 0, 0 },
   3958     1,
   3959     FALSE,
   3960     FALSE,
   3961     0,
   3962     0,
   3963     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3964 };
   3965 
   3966 namespace {
   3967 
   3968 const UConverterSharedData _ISO2022CNData={
   3969     sizeof(UConverterSharedData),
   3970     ~((uint32_t) 0),
   3971     NULL,
   3972     NULL,
   3973     &_ISO2022CNStaticData,
   3974     FALSE,
   3975     &_ISO2022CNImpl,
   3976     0, UCNV_MBCS_TABLE_INITIALIZER
   3977 };
   3978 
   3979 }  // namespace
   3980 
   3981 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3982