Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.c
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "ucnv_imp.h"
     38 #include "ucnv_bld.h"
     39 #include "ucnv_cnv.h"
     40 #include "ucnvmbcs.h"
     41 #include "cstring.h"
     42 #include "cmemory.h"
     43 
     44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     45 
     46 #ifdef U_ENABLE_GENERIC_ISO_2022
     47 /*
     48  * I am disabling the generic ISO-2022 converter after proposing to do so on
     49  * the icu mailing list two days ago.
     50  *
     51  * Reasons:
     52  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     53  *    its designation sequences, single shifts with return to the previous state,
     54  *    switch-with-no-return to UTF-16BE or similar, etc.
     55  *    This is unlike the language-specific variants like ISO-2022-JP which
     56  *    require a much smaller repertoire of ISO-2022 features.
     57  *    These variants continue to be supported.
     58  * 2. I believe that no one is really using the generic ISO-2022 converter
     59  *    but rather always one of the language-specific variants.
     60  *    Note that ICU's generic ISO-2022 converter has always output one escape
     61  *    sequence followed by UTF-8 for the whole stream.
     62  * 3. Switching between subcharsets is extremely slow, because each time
     63  *    the previous converter is closed and a new one opened,
     64  *    without any kind of caching, least-recently-used list, etc.
     65  * 4. The code is currently buggy, and given the above it does not seem
     66  *    reasonable to spend the time on maintenance.
     67  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     68  *    This means, for example, that when ISO-8859-7 is designated, the following
     69  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     70  *    The ICU ISO-2022 converter does not handle this - and has no information
     71  *    about which subconverter would have to be shifted vs. which is designed
     72  *    for 7-bit ISO-2022.
     73  *
     74  * Markus Scherer 2003-dec-03
     75  */
     76 #endif
     77 
     78 static const char SHIFT_IN_STR[]  = "\x0F";
     79 static const char SHIFT_OUT_STR[] = "\x0E";
     80 
     81 #define CR      0x0D
     82 #define LF      0x0A
     83 #define H_TAB   0x09
     84 #define V_TAB   0x0B
     85 #define SPACE   0x20
     86 
     87 enum {
     88     HWKANA_START=0xff61,
     89     HWKANA_END=0xff9f
     90 };
     91 
     92 /*
     93  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     94  * as bytes 21..7E. (Subtract 0x80.)
     95  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     96  * as bytes 20..7F. (Subtract 0x80.)
     97  * Do not encode C1 control codes with native bytes 80..9F
     98  * as bytes 00..1F (C0 control codes).
     99  */
    100 enum {
    101     GR94_START=0xa1,
    102     GR94_END=0xfe,
    103     GR96_START=0xa0,
    104     GR96_END=0xff
    105 };
    106 
    107 /*
    108  * ISO 2022 control codes must not be converted from Unicode
    109  * because they would mess up the byte stream.
    110  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    111  * corresponding to SO, SI, and ESC.
    112  */
    113 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    114 
    115 /* for ISO-2022-JP and -CN implementations */
    116 typedef enum  {
    117         /* shared values */
    118         INVALID_STATE=-1,
    119         ASCII = 0,
    120 
    121         SS2_STATE=0x10,
    122         SS3_STATE,
    123 
    124         /* JP */
    125         ISO8859_1 = 1 ,
    126         ISO8859_7 = 2 ,
    127         JISX201  = 3,
    128         JISX208 = 4,
    129         JISX212 = 5,
    130         GB2312  =6,
    131         KSC5601 =7,
    132         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    133 
    134         /* CN */
    135         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    136         GB2312_1=1,
    137         ISO_IR_165=2,
    138         CNS_11643=3,
    139 
    140         /*
    141          * these are used in StateEnum and ISO2022State variables,
    142          * but CNS_11643 must be used to index into myConverterArray[]
    143          */
    144         CNS_11643_0=0x20,
    145         CNS_11643_1,
    146         CNS_11643_2,
    147         CNS_11643_3,
    148         CNS_11643_4,
    149         CNS_11643_5,
    150         CNS_11643_6,
    151         CNS_11643_7
    152 } StateEnum;
    153 
    154 /* is the StateEnum charset value for a DBCS charset? */
    155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    156 
    157 #define CSM(cs) ((uint16_t)1<<(cs))
    158 
    159 /*
    160  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    161  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    162  *
    163  * Note: The converter uses some leniency:
    164  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    165  *   all versions, not just JIS7 and JIS8.
    166  * - ICU does not distinguish between different versions of JIS X 0208.
    167  */
    168 enum { MAX_JA_VERSION=4 };
    169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    170     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    171     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    174     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    175 };
    176 
    177 typedef enum {
    178         ASCII1=0,
    179         LATIN1,
    180         SBCS,
    181         DBCS,
    182         MBCS,
    183         HWKANA
    184 }Cnv2022Type;
    185 
    186 typedef struct ISO2022State {
    187     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    188     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    189     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    190 } ISO2022State;
    191 
    192 #define UCNV_OPTIONS_VERSION_MASK 0xf
    193 #define UCNV_2022_MAX_CONVERTERS 10
    194 
    195 typedef struct{
    196     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    197     UConverter *currentConverter;
    198     Cnv2022Type currentType;
    199     ISO2022State toU2022State, fromU2022State;
    200     uint32_t key;
    201     uint32_t version;
    202 #ifdef U_ENABLE_GENERIC_ISO_2022
    203     UBool isFirstBuffer;
    204 #endif
    205     UBool isEmptySegment;
    206     char name[30];
    207     char locale[3];
    208 }UConverterDataISO2022;
    209 
    210 /* Protos */
    211 /* ISO-2022 ----------------------------------------------------------------- */
    212 
    213 /*Forward declaration */
    214 U_CFUNC void
    215 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    216                       UErrorCode * err);
    217 U_CFUNC void
    218 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    219                                     UErrorCode * err);
    220 
    221 #define ESC_2022 0x1B /*ESC*/
    222 
    223 typedef enum
    224 {
    225         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    226         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    227         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    228         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    229 } UCNV_TableStates_2022;
    230 
    231 /*
    232 * The way these state transition arrays work is:
    233 * ex : ESC$B is the sequence for JISX208
    234 *      a) First Iteration: char is ESC
    235 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    236 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    237 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    238 *             value of x is stored at escSeqStateTable_Key_2022[0]
    239 *        iii) Save this index as offset
    240 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    241 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    242 *     b) Switch on this state and continue to next char
    243 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    244 *             which is normalize_esq_chars_2022[36] == 4
    245 *         ii) x is currently 1(from above)
    246 *               x<<=5 -- x is now 32
    247 *               x+=normalize_esq_chars_2022[36]
    248 *               now x is 36
    249 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    250 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    251 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    252 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    253 *     c) Switch on this state and continue to next char
    254 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    255 *        ii) x is currently 36 (from above)
    256 *            x<<=5 -- x is now 1152
    257 *            x+=normalize_esq_chars_2022[66]
    258 *            now x is 1161
    259 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    260 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    261 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    262 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    263 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    264 */
    265 
    266 
    267 /*Below are the 3 arrays depicting a state transition table*/
    268 static const int8_t normalize_esq_chars_2022[256] = {
    269 /*       0      1       2       3       4      5       6        7       8       9           */
    270 
    271          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    272         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    273         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    274         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    275         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    276         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    277         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    278         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    279         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    280         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    281         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    282         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0
    297 };
    298 
    299 #ifdef U_ENABLE_GENERIC_ISO_2022
    300 /*
    301  * When the generic ISO-2022 converter is completely removed, not just disabled
    302  * per #ifdef, then the following state table and the associated tables that are
    303  * dimensioned with MAX_STATES_2022 should be trimmed.
    304  *
    305  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    306  * the associated escape sequences starting with ESC ( B should be removed.
    307  * This includes the ones with key values 1097 and all of the ones above 1000000.
    308  *
    309  * For the latter, the tables can simply be truncated.
    310  * For the former, since the tables must be kept parallel, it is probably best
    311  * to simply duplicate an adjacent table cell, parallel in all tables.
    312  *
    313  * It may make sense to restructure the tables, especially by using small search
    314  * tables for the variants instead of indexing them parallel to the table here.
    315  */
    316 #endif
    317 
    318 #define MAX_STATES_2022 74
    319 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    320 /*   0           1           2           3           4           5           6           7           8           9           */
    321 
    322      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    323     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    324     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    325     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    326     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    327     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    328     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    329     ,35947631   ,35947635   ,35947636   ,35947638
    330 };
    331 
    332 #ifdef U_ENABLE_GENERIC_ISO_2022
    333 
    334 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    335  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    336 
    337      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    338     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    339     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    340     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    341     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    342     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    343     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    344     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    345 };
    346 
    347 #endif
    348 
    349 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    350 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    351      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    352     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    353     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    354     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    355     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    357     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    358     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    359 };
    360 
    361 
    362 /* Type def for refactoring changeState_2022 code*/
    363 typedef enum{
    364 #ifdef U_ENABLE_GENERIC_ISO_2022
    365     ISO_2022=0,
    366 #endif
    367     ISO_2022_JP=1,
    368     ISO_2022_KR=2,
    369     ISO_2022_CN=3
    370 } Variant2022;
    371 
    372 /*********** ISO 2022 Converter Protos ***********/
    373 static void
    374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    375 
    376 static void
    377  _ISO2022Close(UConverter *converter);
    378 
    379 static void
    380 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    381 
    382 static const char*
    383 _ISO2022getName(const UConverter* cnv);
    384 
    385 static void
    386 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    387 
    388 static UConverter *
    389 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    390 
    391 #ifdef U_ENABLE_GENERIC_ISO_2022
    392 static void
    393 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    394 #endif
    395 
    396 /*const UConverterSharedData _ISO2022Data;*/
    397 static const UConverterSharedData _ISO2022JPData;
    398 static const UConverterSharedData _ISO2022KRData;
    399 static const UConverterSharedData _ISO2022CNData;
    400 
    401 /*************** Converter implementations ******************/
    402 
    403 /* The purpose of this function is to get around gcc compiler warnings. */
    404 static U_INLINE void
    405 fromUWriteUInt8(UConverter *cnv,
    406                  const char *bytes, int32_t length,
    407                  uint8_t **target, const char *targetLimit,
    408                  int32_t **offsets,
    409                  int32_t sourceIndex,
    410                  UErrorCode *pErrorCode)
    411 {
    412     char *targetChars = (char *)*target;
    413     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    414                          offsets, sourceIndex, pErrorCode);
    415     *target = (uint8_t*)targetChars;
    416 
    417 }
    418 
    419 static U_INLINE void
    420 setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){
    421     if(myConverterData->version == 1) {
    422         UConverter *cnv = myConverterData->currentConverter;
    423 
    424         cnv->toUnicodeStatus=0;     /* offset */
    425         cnv->mode=0;                /* state */
    426         cnv->toULength=0;           /* byteIndex */
    427     }
    428 }
    429 
    430 static U_INLINE void
    431 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    432    /* in ISO-2022-KR the designator sequence appears only once
    433     * in a file so we append it only once
    434     */
    435     if( converter->charErrorBufferLength==0){
    436 
    437         converter->charErrorBufferLength = 4;
    438         converter->charErrorBuffer[0] = 0x1b;
    439         converter->charErrorBuffer[1] = 0x24;
    440         converter->charErrorBuffer[2] = 0x29;
    441         converter->charErrorBuffer[3] = 0x43;
    442     }
    443     if(myConverterData->version == 1) {
    444         UConverter *cnv = myConverterData->currentConverter;
    445 
    446         cnv->fromUChar32=0;
    447         cnv->fromUnicodeStatus=1;   /* prevLength */
    448     }
    449 }
    450 
    451 static void
    452 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    453 
    454     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    455 
    456     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    457     if(cnv->extraInfo != NULL) {
    458         UConverterNamePieces stackPieces;
    459         UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) };
    460         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    461         uint32_t version;
    462 
    463         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    464 
    465         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    466         myConverterData->currentType = ASCII1;
    467         cnv->fromUnicodeStatus =FALSE;
    468         if(pArgs->locale){
    469             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    470         }
    471         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    472         myConverterData->version = version;
    473 
    474         /* BEGIN android-changed */
    475         /* The "jk" locale ID was made up for KDDI ISO-2022-JP. */
    476         /* The "js" locale ID was made up for SoftBank ISO-2022-JP. */
    477         if((myLocale[0]=='j' &&
    478             (myLocale[1]=='a'|| myLocale[1]=='p' || myLocale[1]=='k' ||
    479              myLocale[1]=='s') &&
    480             (myLocale[2]=='_' || myLocale[2]=='\0')))
    481         {
    482             size_t len=0;
    483             /* open the required converters and cache them */
    484             if(version>MAX_JA_VERSION) {
    485                 /* prevent indexing beyond jpCharsetMasks[] */
    486                 myConverterData->version = version = 0;
    487             }
    488             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    489                 myConverterData->myConverterArray[ISO8859_7] =
    490                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    491             }
    492             if (myLocale[1]=='k') {  /* Use KDDI's version. */
    493                 myConverterData->myConverterArray[JISX208] =
    494                     ucnv_loadSharedData("kddi-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    495             } else if (myLocale[1]=='s') {  /* Use SoftBank's version. */
    496                 myConverterData->myConverterArray[JISX208] =
    497                     ucnv_loadSharedData("softbank-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    498             } else {
    499                 myConverterData->myConverterArray[JISX208] =
    500                     ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
    501             }
    502             /* END android-changed */
    503 
    504             if(jpCharsetMasks[version]&CSM(JISX212)) {
    505                 myConverterData->myConverterArray[JISX212] =
    506                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    507             }
    508             if(jpCharsetMasks[version]&CSM(GB2312)) {
    509                 myConverterData->myConverterArray[GB2312] =
    510                     /* BEGIN android-changed */
    511                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
    512                     /* END android-changed */
    513             }
    514             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    515                 myConverterData->myConverterArray[KSC5601] =
    516                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    517             }
    518 
    519             /* set the function pointers to appropriate funtions */
    520             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    521             uprv_strcpy(myConverterData->locale,"ja");
    522 
    523             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    524             len = uprv_strlen(myConverterData->name);
    525             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    526             myConverterData->name[len+1]='\0';
    527         }
    528         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    529             (myLocale[2]=='_' || myLocale[2]=='\0'))
    530         {
    531             const char *cnvName;
    532             if(version==1) {
    533                 cnvName="icu-internal-25546";
    534             } else {
    535                 /* BEGIN android-changed */
    536                 cnvName="ksc_5601";
    537                 /* END android-changed */
    538                 myConverterData->version=version=0;
    539             }
    540             if(pArgs->onlyTestIsLoadable) {
    541                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    542                 uprv_free(cnv->extraInfo);
    543                 cnv->extraInfo=NULL;
    544                 return;
    545             } else {
    546                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    547                 if (U_FAILURE(*errorCode)) {
    548                     _ISO2022Close(cnv);
    549                     return;
    550                 }
    551 
    552                 if(version==1) {
    553                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    554                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    555                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    556                 }else{
    557                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    558                 }
    559 
    560                 /* initialize the state variables */
    561                 setInitialStateToUnicodeKR(cnv, myConverterData);
    562                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    563 
    564                 /* set the function pointers to appropriate funtions */
    565                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    566                 uprv_strcpy(myConverterData->locale,"ko");
    567             }
    568         }
    569         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    570             (myLocale[2]=='_' || myLocale[2]=='\0'))
    571         {
    572 
    573             /* open the required converters and cache them */
    574             /* BEGIN android-changed */
    575             myConverterData->myConverterArray[GB2312_1] =
    576                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    577             if(version==1) {
    578                 myConverterData->myConverterArray[ISO_IR_165] =
    579                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    580             }
    581             myConverterData->myConverterArray[CNS_11643] =
    582                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    583             /* END android-changed */
    584 
    585 
    586             /* set the function pointers to appropriate funtions */
    587             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    588             uprv_strcpy(myConverterData->locale,"cn");
    589 
    590             if (version==0){
    591                 myConverterData->version = 0;
    592                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    593             }else if (version==1){
    594                 myConverterData->version = 1;
    595                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    596             }else {
    597                 myConverterData->version = 2;
    598                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
    599             }
    600         }
    601         else{
    602 #ifdef U_ENABLE_GENERIC_ISO_2022
    603             myConverterData->isFirstBuffer = TRUE;
    604 
    605             /* append the UTF-8 escape sequence */
    606             cnv->charErrorBufferLength = 3;
    607             cnv->charErrorBuffer[0] = 0x1b;
    608             cnv->charErrorBuffer[1] = 0x25;
    609             cnv->charErrorBuffer[2] = 0x42;
    610 
    611             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    612             /* initialize the state variables */
    613             uprv_strcpy(myConverterData->name,"ISO_2022");
    614 #else
    615             *errorCode = U_UNSUPPORTED_ERROR;
    616             return;
    617 #endif
    618         }
    619 
    620         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    621 
    622         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    623             _ISO2022Close(cnv);
    624         }
    625     } else {
    626         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    627     }
    628 }
    629 
    630 
    631 static void
    632 _ISO2022Close(UConverter *converter) {
    633     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    634     UConverterSharedData **array = myData->myConverterArray;
    635     int32_t i;
    636 
    637     if (converter->extraInfo != NULL) {
    638         /*close the array of converter pointers and free the memory*/
    639         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    640             if(array[i]!=NULL) {
    641                 ucnv_unloadSharedDataIfReady(array[i]);
    642             }
    643         }
    644 
    645         ucnv_close(myData->currentConverter);
    646 
    647         if(!converter->isExtraLocal){
    648             uprv_free (converter->extraInfo);
    649             converter->extraInfo = NULL;
    650         }
    651     }
    652 }
    653 
    654 static void
    655 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    656     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    657     if(choice<=UCNV_RESET_TO_UNICODE) {
    658         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    659         myConverterData->key = 0;
    660         myConverterData->isEmptySegment = FALSE;
    661     }
    662     if(choice!=UCNV_RESET_TO_UNICODE) {
    663         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    664     }
    665 #ifdef U_ENABLE_GENERIC_ISO_2022
    666     if(myConverterData->locale[0] == 0){
    667         if(choice<=UCNV_RESET_TO_UNICODE) {
    668             myConverterData->isFirstBuffer = TRUE;
    669             myConverterData->key = 0;
    670             if (converter->mode == UCNV_SO){
    671                 ucnv_close (myConverterData->currentConverter);
    672                 myConverterData->currentConverter=NULL;
    673             }
    674             converter->mode = UCNV_SI;
    675         }
    676         if(choice!=UCNV_RESET_TO_UNICODE) {
    677             /* re-append UTF-8 escape sequence */
    678             converter->charErrorBufferLength = 3;
    679             converter->charErrorBuffer[0] = 0x1b;
    680             converter->charErrorBuffer[1] = 0x28;
    681             converter->charErrorBuffer[2] = 0x42;
    682         }
    683     }
    684     else
    685 #endif
    686     {
    687         /* reset the state variables */
    688         if(myConverterData->locale[0] == 'k'){
    689             if(choice<=UCNV_RESET_TO_UNICODE) {
    690                 setInitialStateToUnicodeKR(converter, myConverterData);
    691             }
    692             if(choice!=UCNV_RESET_TO_UNICODE) {
    693                 setInitialStateFromUnicodeKR(converter, myConverterData);
    694             }
    695         }
    696     }
    697 }
    698 
    699 static const char*
    700 _ISO2022getName(const UConverter* cnv){
    701     if(cnv->extraInfo){
    702         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    703         return myData->name;
    704     }
    705     return NULL;
    706 }
    707 
    708 
    709 /*************** to unicode *******************/
    710 /****************************************************************************
    711  * Recognized escape sequences are
    712  * <ESC>(B  ASCII
    713  * <ESC>.A  ISO-8859-1
    714  * <ESC>.F  ISO-8859-7
    715  * <ESC>(J  JISX-201
    716  * <ESC>(I  JISX-201
    717  * <ESC>$B  JISX-208
    718  * <ESC>$@  JISX-208
    719  * <ESC>$(D JISX-212
    720  * <ESC>$A  GB2312
    721  * <ESC>$(C KSC5601
    722  */
    723 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    724 /*      0                1               2               3               4               5               6               7               8               9    */
    725     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    726     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    727     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    728     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    729     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    730     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    731     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    732     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    733 };
    734 
    735 /*************** to unicode *******************/
    736 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    737 /*      0                1               2               3               4               5               6               7               8               9    */
    738      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    739     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    740     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    741     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    742     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    743     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    744     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    745     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    746 };
    747 
    748 
    749 static UCNV_TableStates_2022
    750 getKey_2022(char c,int32_t* key,int32_t* offset){
    751     int32_t togo;
    752     int32_t low = 0;
    753     int32_t hi = MAX_STATES_2022;
    754     int32_t oldmid=0;
    755 
    756     togo = normalize_esq_chars_2022[(uint8_t)c];
    757     if(togo == 0) {
    758         /* not a valid character anywhere in an escape sequence */
    759         *key = 0;
    760         *offset = 0;
    761         return INVALID_2022;
    762     }
    763     togo = (*key << 5) + togo;
    764 
    765     while (hi != low)  /*binary search*/{
    766 
    767         register int32_t mid = (hi+low) >> 1; /*Finds median*/
    768 
    769         if (mid == oldmid)
    770             break;
    771 
    772         if (escSeqStateTable_Key_2022[mid] > togo){
    773             hi = mid;
    774         }
    775         else if (escSeqStateTable_Key_2022[mid] < togo){
    776             low = mid;
    777         }
    778         else /*we found it*/{
    779             *key = togo;
    780             *offset = mid;
    781             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    782         }
    783         oldmid = mid;
    784 
    785     }
    786 
    787     *key = 0;
    788     *offset = 0;
    789     return INVALID_2022;
    790 }
    791 
    792 /*runs through a state machine to determine the escape sequence - codepage correspondance
    793  */
    794 static void
    795 changeState_2022(UConverter* _this,
    796                 const char** source,
    797                 const char* sourceLimit,
    798                 Variant2022 var,
    799                 UErrorCode* err){
    800     UCNV_TableStates_2022 value;
    801     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    802     uint32_t key = myData2022->key;
    803     int32_t offset = 0;
    804     int8_t initialToULength = _this->toULength;
    805     char c;
    806 
    807     value = VALID_NON_TERMINAL_2022;
    808     while (*source < sourceLimit) {
    809         c = *(*source)++;
    810         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    811         value = getKey_2022(c,(int32_t *) &key, &offset);
    812 
    813         switch (value){
    814 
    815         case VALID_NON_TERMINAL_2022 :
    816             /* continue with the loop */
    817             break;
    818 
    819         case VALID_TERMINAL_2022:
    820             key = 0;
    821             goto DONE;
    822 
    823         case INVALID_2022:
    824             goto DONE;
    825 
    826         case VALID_MAYBE_TERMINAL_2022:
    827 #ifdef U_ENABLE_GENERIC_ISO_2022
    828             /* ESC ( B is ambiguous only for ISO_2022 itself */
    829             if(var == ISO_2022) {
    830                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    831                 _this->toULength = 0;
    832 
    833                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    834 
    835                 /* continue with the loop */
    836                 value = VALID_NON_TERMINAL_2022;
    837                 break;
    838             } else
    839 #endif
    840             {
    841                 /* not ISO_2022 itself, finish here */
    842                 value = VALID_TERMINAL_2022;
    843                 key = 0;
    844                 goto DONE;
    845             }
    846         }
    847     }
    848 
    849 DONE:
    850     myData2022->key = key;
    851 
    852     if (value == VALID_NON_TERMINAL_2022) {
    853         /* indicate that the escape sequence is incomplete: key!=0 */
    854         return;
    855     } else if (value == INVALID_2022 ) {
    856         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    857     } else /* value == VALID_TERMINAL_2022 */ {
    858         switch(var){
    859 #ifdef U_ENABLE_GENERIC_ISO_2022
    860         case ISO_2022:
    861         {
    862             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    863             if(chosenConverterName == NULL) {
    864                 /* SS2 or SS3 */
    865                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    866                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    867                 return;
    868             }
    869 
    870             _this->mode = UCNV_SI;
    871             ucnv_close(myData2022->currentConverter);
    872             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    873             if(U_SUCCESS(*err)) {
    874                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    875                 _this->mode = UCNV_SO;
    876             }
    877             break;
    878         }
    879 #endif
    880         case ISO_2022_JP:
    881             {
    882                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    883                 switch(tempState) {
    884                 case INVALID_STATE:
    885                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    886                     break;
    887                 case SS2_STATE:
    888                     if(myData2022->toU2022State.cs[2]!=0) {
    889                         if(myData2022->toU2022State.g<2) {
    890                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    891                         }
    892                         myData2022->toU2022State.g=2;
    893                     } else {
    894                         /* illegal to have SS2 before a matching designator */
    895                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    896                     }
    897                     break;
    898                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    899                 case ISO8859_1:
    900                 case ISO8859_7:
    901                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    902                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    903                     } else {
    904                         /* G2 charset for SS2 */
    905                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    906                     }
    907                     break;
    908                 default:
    909                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    910                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    911                     } else {
    912                         /* G0 charset */
    913                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    914                     }
    915                     break;
    916                 }
    917             }
    918             break;
    919         case ISO_2022_CN:
    920             {
    921                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    922                 switch(tempState) {
    923                 case INVALID_STATE:
    924                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    925                     break;
    926                 case SS2_STATE:
    927                     if(myData2022->toU2022State.cs[2]!=0) {
    928                         if(myData2022->toU2022State.g<2) {
    929                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    930                         }
    931                         myData2022->toU2022State.g=2;
    932                     } else {
    933                         /* illegal to have SS2 before a matching designator */
    934                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    935                     }
    936                     break;
    937                 case SS3_STATE:
    938                     if(myData2022->toU2022State.cs[3]!=0) {
    939                         if(myData2022->toU2022State.g<2) {
    940                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    941                         }
    942                         myData2022->toU2022State.g=3;
    943                     } else {
    944                         /* illegal to have SS3 before a matching designator */
    945                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    946                     }
    947                     break;
    948                 case ISO_IR_165:
    949                     if(myData2022->version==0) {
    950                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    951                         break;
    952                     }
    953                     /*fall through*/
    954                 case GB2312_1:
    955                     /*fall through*/
    956                 case CNS_11643_1:
    957                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    958                     break;
    959                 case CNS_11643_2:
    960                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    961                     break;
    962                 default:
    963                     /* other CNS 11643 planes */
    964                     if(myData2022->version==0) {
    965                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    966                     } else {
    967                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    968                     }
    969                     break;
    970                 }
    971             }
    972             break;
    973         case ISO_2022_KR:
    974             if(offset==0x30){
    975                 /* nothing to be done, just accept this one escape sequence */
    976             } else {
    977                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    978             }
    979             break;
    980 
    981         default:
    982             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    983             break;
    984         }
    985     }
    986     if(U_SUCCESS(*err)) {
    987         _this->toULength = 0;
    988     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
    989         if(_this->toULength>1) {
    990             /*
    991              * Ticket 5691: consistent illegal sequences:
    992              * - We include at least the first byte (ESC) in the illegal sequence.
    993              * - If any of the non-initial bytes could be the start of a character,
    994              *   we stop the illegal sequence before the first one of those.
    995              *   In escape sequences, all following bytes are "printable", that is,
    996              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
    997              *   they are valid single/lead bytes.
    998              *   For simplicity, we always only report the initial ESC byte as the
    999              *   illegal sequence and back out all other bytes we looked at.
   1000              */
   1001             /* Back out some bytes. */
   1002             int8_t backOutDistance=_this->toULength-1;
   1003             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
   1004             if(backOutDistance<=bytesFromThisBuffer) {
   1005                 /* same as initialToULength<=1 */
   1006                 *source-=backOutDistance;
   1007             } else {
   1008                 /* Back out bytes from the previous buffer: Need to replay them. */
   1009                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
   1010                 /* same as -(initialToULength-1) */
   1011                 /* preToULength is negative! */
   1012                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
   1013                 *source-=bytesFromThisBuffer;
   1014             }
   1015             _this->toULength=1;
   1016         }
   1017     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
   1018         _this->toUCallbackReason = UCNV_UNASSIGNED;
   1019     }
   1020 }
   1021 
   1022 /*Checks the characters of the buffer against valid 2022 escape sequences
   1023 *if the match we return a pointer to the initial start of the sequence otherwise
   1024 *we return sourceLimit
   1025 */
   1026 /*for 2022 looks ahead in the stream
   1027  *to determine the longest possible convertible
   1028  *data stream
   1029  */
   1030 static U_INLINE const char*
   1031 getEndOfBuffer_2022(const char** source,
   1032                    const char* sourceLimit,
   1033                    UBool flush){
   1034 
   1035     const char* mySource = *source;
   1036 
   1037 #ifdef U_ENABLE_GENERIC_ISO_2022
   1038     if (*source >= sourceLimit)
   1039         return sourceLimit;
   1040 
   1041     do{
   1042 
   1043         if (*mySource == ESC_2022){
   1044             int8_t i;
   1045             int32_t key = 0;
   1046             int32_t offset;
   1047             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1048 
   1049             /* Kludge: I could not
   1050             * figure out the reason for validating an escape sequence
   1051             * twice - once here and once in changeState_2022().
   1052             * is it possible to have an ESC character in a ISO2022
   1053             * byte stream which is valid in a code page? Is it legal?
   1054             */
   1055             for (i=0;
   1056             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1057             i++) {
   1058                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1059             }
   1060             if (value > 0 || *mySource==ESC_2022)
   1061                 return mySource;
   1062 
   1063             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1064                 return sourceLimit;
   1065         }
   1066     }while (++mySource < sourceLimit);
   1067 
   1068     return sourceLimit;
   1069 #else
   1070     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1071         ++mySource;
   1072     }
   1073     return mySource;
   1074 #endif
   1075 }
   1076 
   1077 
   1078 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1079  * any future change in _MBCSFromUChar32() function should be reflected here.
   1080  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1081  */
   1082 static U_INLINE int32_t
   1083 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1084                                          UChar32 c,
   1085                                          uint32_t* value,
   1086                                          UBool useFallback,
   1087                                          int outputType)
   1088 {
   1089     const int32_t *cx;
   1090     const uint16_t *table;
   1091     uint32_t stage2Entry;
   1092     uint32_t myValue;
   1093     int32_t length;
   1094     const uint8_t *p;
   1095     /*
   1096      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1097      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1098      * else U_INTERNAL_PROGRAM_ERROR.
   1099      */
   1100     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1101     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1102         table=sharedData->mbcs.fromUnicodeTable;
   1103         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1104         /* get the bytes and the length for the output */
   1105         if(outputType==MBCS_OUTPUT_2){
   1106             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1107             if(myValue<=0xff) {
   1108                 length=1;
   1109             } else {
   1110                 length=2;
   1111             }
   1112         } else /* outputType==MBCS_OUTPUT_3 */ {
   1113             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1114             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1115             if(myValue<=0xff) {
   1116                 length=1;
   1117             } else if(myValue<=0xffff) {
   1118                 length=2;
   1119             } else {
   1120                 length=3;
   1121             }
   1122         }
   1123         /* is this code point assigned, or do we use fallbacks? */
   1124         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1125             /* assigned */
   1126             *value=myValue;
   1127             return length;
   1128         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1129             /*
   1130              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1131              * There is no way with this data structure for fallback output
   1132              * to be a zero byte.
   1133              */
   1134             *value=myValue;
   1135             return -length;
   1136         }
   1137     }
   1138 
   1139     cx=sharedData->mbcs.extIndexes;
   1140     if(cx!=NULL) {
   1141         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1142     }
   1143 
   1144     /* unassigned */
   1145     return 0;
   1146 }
   1147 
   1148 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1149  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1150  * @param retval pointer to output byte
   1151  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1152  */
   1153 static U_INLINE int32_t
   1154 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1155                                        UChar32 c,
   1156                                        uint32_t* retval,
   1157                                        UBool useFallback)
   1158 {
   1159     const uint16_t *table;
   1160     int32_t value;
   1161     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1162     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1163         return 0;
   1164     }
   1165     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1166     table=sharedData->mbcs.fromUnicodeTable;
   1167     /* get the byte for the output */
   1168     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1169     /* is this code point assigned, or do we use fallbacks? */
   1170     *retval=(uint32_t)(value&0xff);
   1171     if(value>=0xf00) {
   1172         return 1;  /* roundtrip */
   1173     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1174         return -1;  /* fallback taken */
   1175     } else {
   1176         return 0;  /* no mapping */
   1177     }
   1178 }
   1179 
   1180 /*
   1181  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1182  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1183  * to move it to the ISO 2022 range 21..7E.
   1184  * Return 0 if out of range.
   1185  */
   1186 static U_INLINE uint32_t
   1187 _2022FromGR94DBCS(uint32_t value) {
   1188     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1189         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1190     ) {
   1191         return value - 0x8080;  /* shift down to 21..7e byte range */
   1192     } else {
   1193         return 0;  /* not valid for ISO 2022 */
   1194     }
   1195 }
   1196 
   1197 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1198 /*
   1199  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1200  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1201  * unchanged.
   1202  */
   1203 static U_INLINE uint32_t
   1204 _2022ToGR94DBCS(uint32_t value) {
   1205     uint32_t returnValue = value + 0x8080;
   1206     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1207         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1208         return returnValue;
   1209     } else {
   1210         return value;
   1211     }
   1212 }
   1213 #endif
   1214 
   1215 #ifdef U_ENABLE_GENERIC_ISO_2022
   1216 
   1217 /**********************************************************************************
   1218 *  ISO-2022 Converter
   1219 *
   1220 *
   1221 */
   1222 
   1223 static void
   1224 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1225                                                            UErrorCode* err){
   1226     const char* mySourceLimit, *realSourceLimit;
   1227     const char* sourceStart;
   1228     const UChar* myTargetStart;
   1229     UConverter* saveThis;
   1230     UConverterDataISO2022* myData;
   1231     int8_t length;
   1232 
   1233     saveThis = args->converter;
   1234     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1235 
   1236     realSourceLimit = args->sourceLimit;
   1237     while (args->source < realSourceLimit) {
   1238         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1239             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1240             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1241 
   1242             if(args->source < mySourceLimit) {
   1243                 if(myData->currentConverter==NULL) {
   1244                     myData->currentConverter = ucnv_open("ASCII",err);
   1245                     if(U_FAILURE(*err)){
   1246                         return;
   1247                     }
   1248 
   1249                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1250                     saveThis->mode = UCNV_SO;
   1251                 }
   1252 
   1253                 /* convert to before the ESC or until the end of the buffer */
   1254                 myData->isFirstBuffer=FALSE;
   1255                 sourceStart = args->source;
   1256                 myTargetStart = args->target;
   1257                 args->converter = myData->currentConverter;
   1258                 ucnv_toUnicode(args->converter,
   1259                     &args->target,
   1260                     args->targetLimit,
   1261                     &args->source,
   1262                     mySourceLimit,
   1263                     args->offsets,
   1264                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1265                     err);
   1266                 args->converter = saveThis;
   1267 
   1268                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1269                     /* move the overflow buffer */
   1270                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1271                     myData->currentConverter->UCharErrorBufferLength = 0;
   1272                     if(length > 0) {
   1273                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1274                                     myData->currentConverter->UCharErrorBuffer,
   1275                                     length*U_SIZEOF_UCHAR);
   1276                     }
   1277                     return;
   1278                 }
   1279 
   1280                 /*
   1281                  * At least one of:
   1282                  * -Error while converting
   1283                  * -Done with entire buffer
   1284                  * -Need to write offsets or update the current offset
   1285                  *  (leave that up to the code in ucnv.c)
   1286                  *
   1287                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1288                  */
   1289                 if (U_FAILURE(*err) ||
   1290                     (args->source == realSourceLimit) ||
   1291                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1292                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1293                 ) {
   1294                     /* copy partial or error input for truncated detection and error handling */
   1295                     if(U_FAILURE(*err)) {
   1296                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1297                         if(length > 0) {
   1298                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1299                         }
   1300                     } else {
   1301                         length = saveThis->toULength = myData->currentConverter->toULength;
   1302                         if(length > 0) {
   1303                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1304                             if(args->source < mySourceLimit) {
   1305                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1306                             }
   1307                         }
   1308                     }
   1309                     return;
   1310                 }
   1311             }
   1312         }
   1313 
   1314         sourceStart = args->source;
   1315         changeState_2022(args->converter,
   1316                &(args->source),
   1317                realSourceLimit,
   1318                ISO_2022,
   1319                err);
   1320         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1321             /* let the ucnv.c code update its current offset */
   1322             return;
   1323         }
   1324     }
   1325 }
   1326 
   1327 #endif
   1328 
   1329 /*
   1330  * To Unicode Callback helper function
   1331  */
   1332 static void
   1333 toUnicodeCallback(UConverter *cnv,
   1334                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1335                   UErrorCode* err){
   1336     if(sourceChar>0xff){
   1337         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1338         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1339         cnv->toULength = 2;
   1340     }
   1341     else{
   1342         cnv->toUBytes[0] =(char) sourceChar;
   1343         cnv->toULength = 1;
   1344     }
   1345 
   1346     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1347         *err = U_INVALID_CHAR_FOUND;
   1348     }
   1349     else{
   1350         *err = U_ILLEGAL_CHAR_FOUND;
   1351     }
   1352 }
   1353 
   1354 /**************************************ISO-2022-JP*************************************************/
   1355 
   1356 /************************************** IMPORTANT **************************************************
   1357 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1358 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1359 * The converter iterates over each Unicode codepoint
   1360 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1361 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1362 * would do as far as possible.
   1363 *
   1364 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1365 * sure that ISO-2022 is also changed.
   1366 ***************************************************************************************************
   1367 */
   1368 
   1369 /***************************************************************************************************
   1370 * Rules for ISO-2022-jp encoding
   1371 * (i)   Escape sequences must be fully contained within a line they should not
   1372 *       span new lines or CRs
   1373 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1374 *       JIS-Roman character escape sequence should follow before the line terminates
   1375 * (iii) If the first character on the line is represented by two bytes then a two
   1376 *       byte character escape sequence should precede it
   1377 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1378 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1379 *       and invoked with SS2 (ESC N).
   1380 * (vi)  If there is any G0 designation in text, there must be a switch to
   1381 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1382 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1383 *       characters such as tab or CRLF.
   1384 * (vi)  Supported encodings:
   1385 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1386 *
   1387 *  source : RFC-1554
   1388 *
   1389 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1390 *          KSC5601 : alias to ibm-949 mapping table
   1391 *          GB2312 : alias to ibm-1386 mapping table
   1392 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1393 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1394 */
   1395 
   1396 /* preference order of JP charsets */
   1397 static const StateEnum jpCharsetPref[]={
   1398     ASCII,
   1399     JISX201,
   1400     ISO8859_1,
   1401     ISO8859_7,
   1402     JISX208,
   1403     JISX212,
   1404     GB2312,
   1405     KSC5601,
   1406     HWKANA_7BIT
   1407 };
   1408 
   1409 /*
   1410  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1411  * not in order of jpCharsetPref[]!
   1412  */
   1413 static const char escSeqChars[][6] ={
   1414     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1415     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1416     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1417     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1418     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1419     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1420     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1421     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1422     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1423 
   1424 };
   1425 static  const int8_t escSeqCharsLen[] ={
   1426     3, /* length of <ESC>(B  ASCII       */
   1427     3, /* length of <ESC>.A  ISO-8859-1  */
   1428     3, /* length of <ESC>.F  ISO-8859-7  */
   1429     3, /* length of <ESC>(J  JISX-201    */
   1430     3, /* length of <ESC>$B  JISX-208    */
   1431     4, /* length of <ESC>$(D JISX-212    */
   1432     3, /* length of <ESC>$A  GB2312      */
   1433     4, /* length of <ESC>$(C KSC5601     */
   1434     3  /* length of <ESC>(I  HWKANA_7BIT */
   1435 };
   1436 
   1437 /*
   1438 * The iteration over various code pages works this way:
   1439 * i)   Get the currentState from myConverterData->currentState
   1440 * ii)  Check if the character is mapped to a valid character in the currentState
   1441 *      Yes ->  a) set the initIterState to currentState
   1442 *       b) remain in this state until an invalid character is found
   1443 *      No  ->  a) go to the next code page and find the character
   1444 * iii) Before changing the state increment the current state check if the current state
   1445 *      is equal to the intitIteration state
   1446 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1447 *       break and return a U_INVALID_CHARACTER error
   1448 *      No  ->  Continue and find the character in next code page
   1449 *
   1450 *
   1451 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1452 */
   1453 
   1454 /* Map 00..7F to Unicode according to JIS X 0201. */
   1455 static U_INLINE uint32_t
   1456 jisx201ToU(uint32_t value) {
   1457     if(value < 0x5c) {
   1458         return value;
   1459     } else if(value == 0x5c) {
   1460         return 0xa5;
   1461     } else if(value == 0x7e) {
   1462         return 0x203e;
   1463     } else /* value <= 0x7f */ {
   1464         return value;
   1465     }
   1466 }
   1467 
   1468 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1469 static U_INLINE uint32_t
   1470 jisx201FromU(uint32_t value) {
   1471     if(value<=0x7f) {
   1472         if(value!=0x5c && value!=0x7e) {
   1473             return value;
   1474         }
   1475     } else if(value==0xa5) {
   1476         return 0x5c;
   1477     } else if(value==0x203e) {
   1478         return 0x7e;
   1479     }
   1480     return 0xfffe;
   1481 }
   1482 
   1483 /*
   1484  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1485  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1486  * Return 0 if the byte pair is out of range.
   1487  */
   1488 static U_INLINE uint32_t
   1489 _2022FromSJIS(uint32_t value) {
   1490     uint8_t trail;
   1491 
   1492     if(value > 0xEFFC) {
   1493         return 0;  /* beyond JIS X 0208 */
   1494     }
   1495 
   1496     trail = (uint8_t)value;
   1497 
   1498     value &= 0xff00;  /* lead byte */
   1499     if(value <= 0x9f00) {
   1500         value -= 0x7000;
   1501     } else /* 0xe000 <= value <= 0xef00 */ {
   1502         value -= 0xb000;
   1503     }
   1504     value <<= 1;
   1505 
   1506     if(trail <= 0x9e) {
   1507         value -= 0x100;
   1508         if(trail <= 0x7e) {
   1509             value |= trail - 0x1f;
   1510         } else {
   1511             value |= trail - 0x20;
   1512         }
   1513     } else /* trail <= 0xfc */ {
   1514         value |= trail - 0x7e;
   1515     }
   1516     return value;
   1517 }
   1518 
   1519 /*
   1520  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1521  * If either byte is outside 21..7E make sure that the result is not valid
   1522  * for Shift-JIS so that the converter catches it.
   1523  * Some invalid byte values already turn into equally invalid Shift-JIS
   1524  * byte values and need not be tested explicitly.
   1525  */
   1526 static U_INLINE void
   1527 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1528     if(c1&1) {
   1529         ++c1;
   1530         if(c2 <= 0x5f) {
   1531             c2 += 0x1f;
   1532         } else if(c2 <= 0x7e) {
   1533             c2 += 0x20;
   1534         } else {
   1535             c2 = 0;  /* invalid */
   1536         }
   1537     } else {
   1538         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1539             c2 += 0x7e;
   1540         } else {
   1541             c2 = 0;  /* invalid */
   1542         }
   1543     }
   1544     c1 >>= 1;
   1545     if(c1 <= 0x2f) {
   1546         c1 += 0x70;
   1547     } else if(c1 <= 0x3f) {
   1548         c1 += 0xb0;
   1549     } else {
   1550         c1 = 0;  /* invalid */
   1551     }
   1552     bytes[0] = (char)c1;
   1553     bytes[1] = (char)c2;
   1554 }
   1555 
   1556 /*
   1557  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1558  * Katakana.
   1559  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1560  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1561  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1562  */
   1563 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1564     0x2123,  /* U+FF61 */
   1565     0x2156,
   1566     0x2157,
   1567     0x2122,
   1568     0x2126,
   1569     0x2572,
   1570     0x2521,
   1571     0x2523,
   1572     0x2525,
   1573     0x2527,
   1574     0x2529,
   1575     0x2563,
   1576     0x2565,
   1577     0x2567,
   1578     0x2543,
   1579     0x213C,  /* U+FF70 */
   1580     0x2522,
   1581     0x2524,
   1582     0x2526,
   1583     0x2528,
   1584     0x252A,
   1585     0x252B,
   1586     0x252D,
   1587     0x252F,
   1588     0x2531,
   1589     0x2533,
   1590     0x2535,
   1591     0x2537,
   1592     0x2539,
   1593     0x253B,
   1594     0x253D,
   1595     0x253F,  /* U+FF80 */
   1596     0x2541,
   1597     0x2544,
   1598     0x2546,
   1599     0x2548,
   1600     0x254A,
   1601     0x254B,
   1602     0x254C,
   1603     0x254D,
   1604     0x254E,
   1605     0x254F,
   1606     0x2552,
   1607     0x2555,
   1608     0x2558,
   1609     0x255B,
   1610     0x255E,
   1611     0x255F,  /* U+FF90 */
   1612     0x2560,
   1613     0x2561,
   1614     0x2562,
   1615     0x2564,
   1616     0x2566,
   1617     0x2568,
   1618     0x2569,
   1619     0x256A,
   1620     0x256B,
   1621     0x256C,
   1622     0x256D,
   1623     0x256F,
   1624     0x2573,
   1625     0x212B,
   1626     0x212C   /* U+FF9F */
   1627 };
   1628 
   1629 static void
   1630 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1631     UConverter *cnv = args->converter;
   1632     UConverterDataISO2022 *converterData;
   1633     ISO2022State *pFromU2022State;
   1634     uint8_t *target = (uint8_t *) args->target;
   1635     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1636     const UChar* source = args->source;
   1637     const UChar* sourceLimit = args->sourceLimit;
   1638     int32_t* offsets = args->offsets;
   1639     UChar32 sourceChar;
   1640     char buffer[8];
   1641     int32_t len, outLen;
   1642     int8_t choices[10];
   1643     int32_t choiceCount;
   1644     uint32_t targetValue = 0;
   1645     UBool useFallback;
   1646 
   1647     int32_t i;
   1648     int8_t cs, g;
   1649 
   1650     /* set up the state */
   1651     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1652     pFromU2022State   = &converterData->fromU2022State;
   1653 
   1654     choiceCount = 0;
   1655 
   1656     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1657     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1658         goto getTrail;
   1659     }
   1660 
   1661     while(source < sourceLimit) {
   1662         if(target < targetLimit) {
   1663 
   1664             sourceChar  = *(source++);
   1665             /*check if the char is a First surrogate*/
   1666             if(UTF_IS_SURROGATE(sourceChar)) {
   1667                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   1668 getTrail:
   1669                     /*look ahead to find the trail surrogate*/
   1670                     if(source < sourceLimit) {
   1671                         /* test the following code unit */
   1672                         UChar trail=(UChar) *source;
   1673                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   1674                             source++;
   1675                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   1676                             cnv->fromUChar32=0x00;
   1677                             /* convert this supplementary code point */
   1678                             /* exit this condition tree */
   1679                         } else {
   1680                             /* this is an unmatched lead code unit (1st surrogate) */
   1681                             /* callback(illegal) */
   1682                             *err=U_ILLEGAL_CHAR_FOUND;
   1683                             cnv->fromUChar32=sourceChar;
   1684                             break;
   1685                         }
   1686                     } else {
   1687                         /* no more input */
   1688                         cnv->fromUChar32=sourceChar;
   1689                         break;
   1690                     }
   1691                 } else {
   1692                     /* this is an unmatched trail code unit (2nd surrogate) */
   1693                     /* callback(illegal) */
   1694                     *err=U_ILLEGAL_CHAR_FOUND;
   1695                     cnv->fromUChar32=sourceChar;
   1696                     break;
   1697                 }
   1698             }
   1699 
   1700             /* do not convert SO/SI/ESC */
   1701             if(IS_2022_CONTROL(sourceChar)) {
   1702                 /* callback(illegal) */
   1703                 *err=U_ILLEGAL_CHAR_FOUND;
   1704                 cnv->fromUChar32=sourceChar;
   1705                 break;
   1706             }
   1707 
   1708             /* do the conversion */
   1709 
   1710             if(choiceCount == 0) {
   1711                 uint16_t csm;
   1712 
   1713                 /*
   1714                  * The csm variable keeps track of which charsets are allowed
   1715                  * and not used yet while building the choices[].
   1716                  */
   1717                 csm = jpCharsetMasks[converterData->version];
   1718                 choiceCount = 0;
   1719 
   1720                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1721                 if(converterData->version == 3 || converterData->version == 4) {
   1722                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1723                 }
   1724                 /* Do not try single-byte half-width Katakana for other versions. */
   1725                 csm &= ~CSM(HWKANA_7BIT);
   1726 
   1727                 /* try the current G0 charset */
   1728                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1729                 csm &= ~CSM(cs);
   1730 
   1731                 /* try the current G2 charset */
   1732                 if((cs = pFromU2022State->cs[2]) != 0) {
   1733                     choices[choiceCount++] = cs;
   1734                     csm &= ~CSM(cs);
   1735                 }
   1736 
   1737                 /* try all the other possible charsets */
   1738                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1739                     cs = (int8_t)jpCharsetPref[i];
   1740                     if(CSM(cs) & csm) {
   1741                         choices[choiceCount++] = cs;
   1742                         csm &= ~CSM(cs);
   1743                     }
   1744                 }
   1745             }
   1746 
   1747             cs = g = 0;
   1748             /*
   1749              * len==0: no mapping found yet
   1750              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1751              * len>0: found a roundtrip result, done
   1752              */
   1753             len = 0;
   1754             /*
   1755              * We will turn off useFallback after finding a fallback,
   1756              * but we still get fallbacks from PUA code points as usual.
   1757              * Therefore, we will also need to check that we don't overwrite
   1758              * an early fallback with a later one.
   1759              */
   1760             useFallback = cnv->useFallback;
   1761 
   1762             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1763                 uint32_t value;
   1764                 int32_t len2;
   1765                 int8_t cs0 = choices[i];
   1766                 switch(cs0) {
   1767                 case ASCII:
   1768                     if(sourceChar <= 0x7f) {
   1769                         targetValue = (uint32_t)sourceChar;
   1770                         len = 1;
   1771                         cs = cs0;
   1772                         g = 0;
   1773                     }
   1774                     break;
   1775                 case ISO8859_1:
   1776                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1777                         targetValue = (uint32_t)sourceChar - 0x80;
   1778                         len = 1;
   1779                         cs = cs0;
   1780                         g = 2;
   1781                     }
   1782                     break;
   1783                 case HWKANA_7BIT:
   1784                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1785                         if(converterData->version==3) {
   1786                             /* JIS7: use G1 (SO) */
   1787                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1788                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1789                             len = 1;
   1790                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1791                             g = 1;
   1792                         } else if(converterData->version==4) {
   1793                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1794                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1795                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1796                             len = 1;
   1797 
   1798                             cs = pFromU2022State->cs[0];
   1799                             if(IS_JP_DBCS(cs)) {
   1800                                 /* switch from a DBCS charset to JISX201 */
   1801                                 cs = (int8_t)JISX201;
   1802                             }
   1803                             /* else stay in the current G0 charset */
   1804                             g = 0;
   1805                         }
   1806                         /* else do not use HWKANA_7BIT with other versions */
   1807                     }
   1808                     break;
   1809                 case JISX201:
   1810                     /* G0 SBCS */
   1811                     value = jisx201FromU(sourceChar);
   1812                     if(value <= 0x7f) {
   1813                         targetValue = value;
   1814                         len = 1;
   1815                         cs = cs0;
   1816                         g = 0;
   1817                         useFallback = FALSE;
   1818                     }
   1819                     break;
   1820                 case JISX208:
   1821                     /* G0 DBCS from Shift-JIS table */
   1822                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1823                                 converterData->myConverterArray[cs0],
   1824                                 sourceChar, &value,
   1825                                 useFallback, MBCS_OUTPUT_2);
   1826                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1827                         value = _2022FromSJIS(value);
   1828                         if(value != 0) {
   1829                             targetValue = value;
   1830                             len = len2;
   1831                             cs = cs0;
   1832                             g = 0;
   1833                             useFallback = FALSE;
   1834                         }
   1835                     } else if(len == 0 && useFallback &&
   1836                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1837                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1838                         len = -2;
   1839                         cs = cs0;
   1840                         g = 0;
   1841                         useFallback = FALSE;
   1842                     }
   1843                     break;
   1844                 case ISO8859_7:
   1845                     /* G0 SBCS forced to 7-bit output */
   1846                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1847                                 converterData->myConverterArray[cs0],
   1848                                 sourceChar, &value,
   1849                                 useFallback);
   1850                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1851                         targetValue = value - 0x80;
   1852                         len = len2;
   1853                         cs = cs0;
   1854                         g = 2;
   1855                         useFallback = FALSE;
   1856                     }
   1857                     break;
   1858                 default:
   1859                     /* G0 DBCS */
   1860                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1861                                 converterData->myConverterArray[cs0],
   1862                                 sourceChar, &value,
   1863                                 useFallback, MBCS_OUTPUT_2);
   1864                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1865                         if(cs0 == KSC5601) {
   1866                             /*
   1867                              * Check for valid bytes for the encoding scheme.
   1868                              * This is necessary because the sub-converter (windows-949)
   1869                              * has a broader encoding scheme than is valid for 2022.
   1870                              */
   1871                             value = _2022FromGR94DBCS(value);
   1872                             if(value == 0) {
   1873                                 break;
   1874                             }
   1875                         }
   1876                         targetValue = value;
   1877                         len = len2;
   1878                         cs = cs0;
   1879                         g = 0;
   1880                         useFallback = FALSE;
   1881                     }
   1882                     break;
   1883                 }
   1884             }
   1885 
   1886             if(len != 0) {
   1887                 if(len < 0) {
   1888                     len = -len;  /* fallback */
   1889                 }
   1890                 outLen = 0; /* count output bytes */
   1891 
   1892                 /* write SI if necessary (only for JIS7) */
   1893                 if(pFromU2022State->g == 1 && g == 0) {
   1894                     buffer[outLen++] = UCNV_SI;
   1895                     pFromU2022State->g = 0;
   1896                 }
   1897 
   1898                 /* write the designation sequence if necessary */
   1899                 if(cs != pFromU2022State->cs[g]) {
   1900                     int32_t escLen = escSeqCharsLen[cs];
   1901                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1902                     outLen += escLen;
   1903                     pFromU2022State->cs[g] = cs;
   1904 
   1905                     /* invalidate the choices[] */
   1906                     choiceCount = 0;
   1907                 }
   1908 
   1909                 /* write the shift sequence if necessary */
   1910                 if(g != pFromU2022State->g) {
   1911                     switch(g) {
   1912                     /* case 0 handled before writing escapes */
   1913                     case 1:
   1914                         buffer[outLen++] = UCNV_SO;
   1915                         pFromU2022State->g = 1;
   1916                         break;
   1917                     default: /* case 2 */
   1918                         buffer[outLen++] = 0x1b;
   1919                         buffer[outLen++] = 0x4e;
   1920                         break;
   1921                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1922                     }
   1923                 }
   1924 
   1925                 /* write the output bytes */
   1926                 if(len == 1) {
   1927                     buffer[outLen++] = (char)targetValue;
   1928                 } else /* len == 2 */ {
   1929                     buffer[outLen++] = (char)(targetValue >> 8);
   1930                     buffer[outLen++] = (char)targetValue;
   1931                 }
   1932             } else {
   1933                 /*
   1934                  * if we cannot find the character after checking all codepages
   1935                  * then this is an error
   1936                  */
   1937                 *err = U_INVALID_CHAR_FOUND;
   1938                 cnv->fromUChar32=sourceChar;
   1939                 break;
   1940             }
   1941 
   1942             if(sourceChar == CR || sourceChar == LF) {
   1943                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1944                 pFromU2022State->cs[2] = 0;
   1945                 choiceCount = 0;
   1946             }
   1947 
   1948             /* output outLen>0 bytes in buffer[] */
   1949             if(outLen == 1) {
   1950                 *target++ = buffer[0];
   1951                 if(offsets) {
   1952                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1953                 }
   1954             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1955                 *target++ = buffer[0];
   1956                 *target++ = buffer[1];
   1957                 if(offsets) {
   1958                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1959                     *offsets++ = sourceIndex;
   1960                     *offsets++ = sourceIndex;
   1961                 }
   1962             } else {
   1963                 fromUWriteUInt8(
   1964                     cnv,
   1965                     buffer, outLen,
   1966                     &target, (const char *)targetLimit,
   1967                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1968                     err);
   1969                 if(U_FAILURE(*err)) {
   1970                     break;
   1971                 }
   1972             }
   1973         } /* end if(myTargetIndex<myTargetLength) */
   1974         else{
   1975             *err =U_BUFFER_OVERFLOW_ERROR;
   1976             break;
   1977         }
   1978 
   1979     }/* end while(mySourceIndex<mySourceLength) */
   1980 
   1981     /*
   1982      * the end of the input stream and detection of truncated input
   1983      * are handled by the framework, but for ISO-2022-JP conversion
   1984      * we need to be in ASCII mode at the very end
   1985      *
   1986      * conditions:
   1987      *   successful
   1988      *   in SO mode or not in ASCII mode
   1989      *   end of input and no truncated input
   1990      */
   1991     if( U_SUCCESS(*err) &&
   1992         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   1993         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   1994     ) {
   1995         int32_t sourceIndex;
   1996 
   1997         outLen = 0;
   1998 
   1999         if(pFromU2022State->g != 0) {
   2000             buffer[outLen++] = UCNV_SI;
   2001             pFromU2022State->g = 0;
   2002         }
   2003 
   2004         if(pFromU2022State->cs[0] != ASCII) {
   2005             int32_t escLen = escSeqCharsLen[ASCII];
   2006             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   2007             outLen += escLen;
   2008             pFromU2022State->cs[0] = (int8_t)ASCII;
   2009         }
   2010 
   2011         /* get the source index of the last input character */
   2012         /*
   2013          * TODO this would be simpler and more reliable if we used a pair
   2014          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2015          * so that we could simply use the prevSourceIndex here;
   2016          * this code gives an incorrect result for the rare case of an unmatched
   2017          * trail surrogate that is alone in the last buffer of the text stream
   2018          */
   2019         sourceIndex=(int32_t)(source-args->source);
   2020         if(sourceIndex>0) {
   2021             --sourceIndex;
   2022             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2023                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2024             ) {
   2025                 --sourceIndex;
   2026             }
   2027         } else {
   2028             sourceIndex=-1;
   2029         }
   2030 
   2031         fromUWriteUInt8(
   2032             cnv,
   2033             buffer, outLen,
   2034             &target, (const char *)targetLimit,
   2035             &offsets, sourceIndex,
   2036             err);
   2037     }
   2038 
   2039     /*save the state and return */
   2040     args->source = source;
   2041     args->target = (char*)target;
   2042 }
   2043 
   2044 /*************** to unicode *******************/
   2045 
   2046 static void
   2047 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2048                                                UErrorCode* err){
   2049     char tempBuf[2];
   2050     const char *mySource = (char *) args->source;
   2051     UChar *myTarget = args->target;
   2052     const char *mySourceLimit = args->sourceLimit;
   2053     uint32_t targetUniChar = 0x0000;
   2054     uint32_t mySourceChar = 0x0000;
   2055     uint32_t tmpSourceChar = 0x0000;
   2056     UConverterDataISO2022* myData;
   2057     ISO2022State *pToU2022State;
   2058     StateEnum cs;
   2059 
   2060     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2061     pToU2022State = &myData->toU2022State;
   2062 
   2063     if(myData->key != 0) {
   2064         /* continue with a partial escape sequence */
   2065         goto escape;
   2066     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2067         /* continue with a partial double-byte character */
   2068         mySourceChar = args->converter->toUBytes[0];
   2069         args->converter->toULength = 0;
   2070         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2071         targetUniChar = missingCharMarker;
   2072         goto getTrailByte;
   2073     }
   2074 
   2075     while(mySource < mySourceLimit){
   2076 
   2077         targetUniChar =missingCharMarker;
   2078 
   2079         if(myTarget < args->targetLimit){
   2080 
   2081             mySourceChar= (unsigned char) *mySource++;
   2082 
   2083             switch(mySourceChar) {
   2084             case UCNV_SI:
   2085                 if(myData->version==3) {
   2086                     pToU2022State->g=0;
   2087                     continue;
   2088                 } else {
   2089                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2090                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2091                     break;
   2092                 }
   2093 
   2094             case UCNV_SO:
   2095                 if(myData->version==3) {
   2096                     /* JIS7: switch to G1 half-width Katakana */
   2097                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2098                     pToU2022State->g=1;
   2099                     continue;
   2100                 } else {
   2101                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2102                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2103                     break;
   2104                 }
   2105 
   2106             case ESC_2022:
   2107                 mySource--;
   2108 escape:
   2109                 {
   2110                     const char * mySourceBefore = mySource;
   2111                     int8_t toULengthBefore = args->converter->toULength;
   2112 
   2113                     changeState_2022(args->converter,&(mySource),
   2114                         mySourceLimit, ISO_2022_JP,err);
   2115 
   2116                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2117                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2118                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2119                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2120                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   2121                     }
   2122                 }
   2123 
   2124                 /* invalid or illegal escape sequence */
   2125                 if(U_FAILURE(*err)){
   2126                     args->target = myTarget;
   2127                     args->source = mySource;
   2128                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2129                     return;
   2130                 }
   2131                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2132                 if(myData->key==0) {
   2133                     myData->isEmptySegment = TRUE;
   2134                 }
   2135                 continue;
   2136 
   2137             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2138 
   2139             case CR:
   2140                 /*falls through*/
   2141             case LF:
   2142                 /* automatically reset to single-byte mode */
   2143                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2144                     pToU2022State->cs[0] = (int8_t)ASCII;
   2145                 }
   2146                 pToU2022State->cs[2] = 0;
   2147                 pToU2022State->g = 0;
   2148                 /* falls through */
   2149             default:
   2150                 /* convert one or two bytes */
   2151                 myData->isEmptySegment = FALSE;
   2152                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2153                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2154                     !IS_JP_DBCS(cs)
   2155                 ) {
   2156                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2157                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2158 
   2159                     /* return from a single-shift state to the previous one */
   2160                     if(pToU2022State->g >= 2) {
   2161                         pToU2022State->g=pToU2022State->prevG;
   2162                     }
   2163                 } else switch(cs) {
   2164                 case ASCII:
   2165                     if(mySourceChar <= 0x7f) {
   2166                         targetUniChar = mySourceChar;
   2167                     }
   2168                     break;
   2169                 case ISO8859_1:
   2170                     if(mySourceChar <= 0x7f) {
   2171                         targetUniChar = mySourceChar + 0x80;
   2172                     }
   2173                     /* return from a single-shift state to the previous one */
   2174                     pToU2022State->g=pToU2022State->prevG;
   2175                     break;
   2176                 case ISO8859_7:
   2177                     if(mySourceChar <= 0x7f) {
   2178                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2179                         targetUniChar =
   2180                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2181                                 myData->myConverterArray[cs],
   2182                                 mySourceChar + 0x80);
   2183                     }
   2184                     /* return from a single-shift state to the previous one */
   2185                     pToU2022State->g=pToU2022State->prevG;
   2186                     break;
   2187                 case JISX201:
   2188                     if(mySourceChar <= 0x7f) {
   2189                         targetUniChar = jisx201ToU(mySourceChar);
   2190                     }
   2191                     break;
   2192                 case HWKANA_7BIT:
   2193                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2194                         /* 7-bit halfwidth Katakana */
   2195                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2196                     }
   2197                     break;
   2198                 default:
   2199                     /* G0 DBCS */
   2200                     if(mySource < mySourceLimit) {
   2201                         int leadIsOk, trailIsOk;
   2202                         uint8_t trailByte;
   2203 getTrailByte:
   2204                         trailByte = (uint8_t)*mySource;
   2205                         /*
   2206                          * Ticket 5691: consistent illegal sequences:
   2207                          * - We include at least the first byte in the illegal sequence.
   2208                          * - If any of the non-initial bytes could be the start of a character,
   2209                          *   we stop the illegal sequence before the first one of those.
   2210                          *
   2211                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2212                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2213                          * Otherwise we convert or report the pair of bytes.
   2214                          */
   2215                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2216                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2217                         if (leadIsOk && trailIsOk) {
   2218                             ++mySource;
   2219                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2220                             if(cs == JISX208) {
   2221                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2222                                 mySourceChar = tmpSourceChar;
   2223                             } else {
   2224                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2225                                 mySourceChar = tmpSourceChar;
   2226                                 if (cs == KSC5601) {
   2227                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2228                                 }
   2229                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2230                                 tempBuf[1] = (char)(tmpSourceChar);
   2231                             }
   2232                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2233                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2234                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2235                             ++mySource;
   2236                             /* add another bit so that the code below writes 2 bytes in case of error */
   2237                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2238                         }
   2239                     } else {
   2240                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2241                         args->converter->toULength = 1;
   2242                         goto endloop;
   2243                     }
   2244                 }  /* End of inner switch */
   2245                 break;
   2246             }  /* End of outer switch */
   2247             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2248                 if(args->offsets){
   2249                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2250                 }
   2251                 *(myTarget++)=(UChar)targetUniChar;
   2252             }
   2253             else if(targetUniChar > missingCharMarker){
   2254                 /* disassemble the surrogate pair and write to output*/
   2255                 targetUniChar-=0x0010000;
   2256                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2257                 if(args->offsets){
   2258                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2259                 }
   2260                 ++myTarget;
   2261                 if(myTarget< args->targetLimit){
   2262                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2263                     if(args->offsets){
   2264                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2265                     }
   2266                     ++myTarget;
   2267                 }else{
   2268                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2269                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2270                 }
   2271 
   2272             }
   2273             else{
   2274                 /* Call the callback function*/
   2275                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2276                 break;
   2277             }
   2278         }
   2279         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2280             *err =U_BUFFER_OVERFLOW_ERROR;
   2281             break;
   2282         }
   2283     }
   2284 endloop:
   2285     args->target = myTarget;
   2286     args->source = mySource;
   2287 }
   2288 
   2289 
   2290 /***************************************************************
   2291 *   Rules for ISO-2022-KR encoding
   2292 *   i) The KSC5601 designator sequence should appear only once in a file,
   2293 *      at the begining of a line before any KSC5601 characters. This usually
   2294 *      means that it appears by itself on the first line of the file
   2295 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2296 *      and SI to shift into single byte mode
   2297 */
   2298 static void
   2299 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2300 
   2301     UConverter* saveConv = args->converter;
   2302     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2303     args->converter=myConverterData->currentConverter;
   2304 
   2305     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2306     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2307     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2308 
   2309     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2310         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2311             uprv_memcpy(
   2312                 saveConv->charErrorBuffer,
   2313                 myConverterData->currentConverter->charErrorBuffer,
   2314                 myConverterData->currentConverter->charErrorBufferLength);
   2315         }
   2316         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2317         myConverterData->currentConverter->charErrorBufferLength = 0;
   2318     }
   2319     args->converter=saveConv;
   2320 }
   2321 
   2322 static void
   2323 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2324 
   2325     const UChar *source = args->source;
   2326     const UChar *sourceLimit = args->sourceLimit;
   2327     unsigned char *target = (unsigned char *) args->target;
   2328     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2329     int32_t* offsets = args->offsets;
   2330     uint32_t targetByteUnit = 0x0000;
   2331     UChar32 sourceChar = 0x0000;
   2332     UBool isTargetByteDBCS;
   2333     UBool oldIsTargetByteDBCS;
   2334     UConverterDataISO2022 *converterData;
   2335     UConverterSharedData* sharedData;
   2336     UBool useFallback;
   2337     int32_t length =0;
   2338 
   2339     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2340     /* if the version is 1 then the user is requesting
   2341      * conversion with ibm-25546 pass the arguments to
   2342      * MBCS converter and return
   2343      */
   2344     if(converterData->version==1){
   2345         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2346         return;
   2347     }
   2348 
   2349     /* initialize data */
   2350     sharedData = converterData->currentConverter->sharedData;
   2351     useFallback = args->converter->useFallback;
   2352     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2353     oldIsTargetByteDBCS = isTargetByteDBCS;
   2354 
   2355     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2356     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2357         goto getTrail;
   2358     }
   2359     while(source < sourceLimit){
   2360 
   2361         targetByteUnit = missingCharMarker;
   2362 
   2363         if(target < (unsigned char*) args->targetLimit){
   2364             sourceChar = *source++;
   2365 
   2366             /* do not convert SO/SI/ESC */
   2367             if(IS_2022_CONTROL(sourceChar)) {
   2368                 /* callback(illegal) */
   2369                 *err=U_ILLEGAL_CHAR_FOUND;
   2370                 args->converter->fromUChar32=sourceChar;
   2371                 break;
   2372             }
   2373 
   2374             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2375             if(length < 0) {
   2376                 length = -length;  /* fallback */
   2377             }
   2378             /* only DBCS or SBCS characters are expected*/
   2379             /* DB characters with high bit set to 1 are expected */
   2380             if( length > 2 || length==0 ||
   2381                 (length == 1 && targetByteUnit > 0x7f) ||
   2382                 (length == 2 &&
   2383                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2384                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2385             ) {
   2386                 targetByteUnit=missingCharMarker;
   2387             }
   2388             if (targetByteUnit != missingCharMarker){
   2389 
   2390                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2391                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2392                   /* append the shift sequence */
   2393                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2394 
   2395                     if (isTargetByteDBCS)
   2396                         *target++ = UCNV_SO;
   2397                     else
   2398                         *target++ = UCNV_SI;
   2399                     if(offsets)
   2400                         *(offsets++) = (int32_t)(source - args->source-1);
   2401                 }
   2402                 /* write the targetUniChar  to target */
   2403                 if(targetByteUnit <= 0x00FF){
   2404                     if( target < targetLimit){
   2405                         *(target++) = (unsigned char) targetByteUnit;
   2406                         if(offsets){
   2407                             *(offsets++) = (int32_t)(source - args->source-1);
   2408                         }
   2409 
   2410                     }else{
   2411                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2412                         *err = U_BUFFER_OVERFLOW_ERROR;
   2413                     }
   2414                 }else{
   2415                     if(target < targetLimit){
   2416                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2417                         if(offsets){
   2418                             *(offsets++) = (int32_t)(source - args->source-1);
   2419                         }
   2420                         if(target < targetLimit){
   2421                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2422                             if(offsets){
   2423                                 *(offsets++) = (int32_t)(source - args->source-1);
   2424                             }
   2425                         }else{
   2426                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2427                             *err = U_BUFFER_OVERFLOW_ERROR;
   2428                         }
   2429                     }else{
   2430                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2431                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2432                         *err = U_BUFFER_OVERFLOW_ERROR;
   2433                     }
   2434                 }
   2435 
   2436             }
   2437             else{
   2438                 /* oops.. the code point is unassingned
   2439                  * set the error and reason
   2440                  */
   2441 
   2442                 /*check if the char is a First surrogate*/
   2443                 if(UTF_IS_SURROGATE(sourceChar)) {
   2444                     if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2445 getTrail:
   2446                         /*look ahead to find the trail surrogate*/
   2447                         if(source <  sourceLimit) {
   2448                             /* test the following code unit */
   2449                             UChar trail=(UChar) *source;
   2450                             if(UTF_IS_SECOND_SURROGATE(trail)) {
   2451                                 source++;
   2452                                 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2453                                 *err = U_INVALID_CHAR_FOUND;
   2454                                 /* convert this surrogate code point */
   2455                                 /* exit this condition tree */
   2456                             } else {
   2457                                 /* this is an unmatched lead code unit (1st surrogate) */
   2458                                 /* callback(illegal) */
   2459                                 *err=U_ILLEGAL_CHAR_FOUND;
   2460                             }
   2461                         } else {
   2462                             /* no more input */
   2463                             *err = U_ZERO_ERROR;
   2464                         }
   2465                     } else {
   2466                         /* this is an unmatched trail code unit (2nd surrogate) */
   2467                         /* callback(illegal) */
   2468                         *err=U_ILLEGAL_CHAR_FOUND;
   2469                     }
   2470                 } else {
   2471                     /* callback(unassigned) for a BMP code point */
   2472                     *err = U_INVALID_CHAR_FOUND;
   2473                 }
   2474 
   2475                 args->converter->fromUChar32=sourceChar;
   2476                 break;
   2477             }
   2478         } /* end if(myTargetIndex<myTargetLength) */
   2479         else{
   2480             *err =U_BUFFER_OVERFLOW_ERROR;
   2481             break;
   2482         }
   2483 
   2484     }/* end while(mySourceIndex<mySourceLength) */
   2485 
   2486     /*
   2487      * the end of the input stream and detection of truncated input
   2488      * are handled by the framework, but for ISO-2022-KR conversion
   2489      * we need to be in ASCII mode at the very end
   2490      *
   2491      * conditions:
   2492      *   successful
   2493      *   not in ASCII mode
   2494      *   end of input and no truncated input
   2495      */
   2496     if( U_SUCCESS(*err) &&
   2497         isTargetByteDBCS &&
   2498         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2499     ) {
   2500         int32_t sourceIndex;
   2501 
   2502         /* we are switching to ASCII */
   2503         isTargetByteDBCS=FALSE;
   2504 
   2505         /* get the source index of the last input character */
   2506         /*
   2507          * TODO this would be simpler and more reliable if we used a pair
   2508          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2509          * so that we could simply use the prevSourceIndex here;
   2510          * this code gives an incorrect result for the rare case of an unmatched
   2511          * trail surrogate that is alone in the last buffer of the text stream
   2512          */
   2513         sourceIndex=(int32_t)(source-args->source);
   2514         if(sourceIndex>0) {
   2515             --sourceIndex;
   2516             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2517                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2518             ) {
   2519                 --sourceIndex;
   2520             }
   2521         } else {
   2522             sourceIndex=-1;
   2523         }
   2524 
   2525         fromUWriteUInt8(
   2526             args->converter,
   2527             SHIFT_IN_STR, 1,
   2528             &target, (const char *)targetLimit,
   2529             &offsets, sourceIndex,
   2530             err);
   2531     }
   2532 
   2533     /*save the state and return */
   2534     args->source = source;
   2535     args->target = (char*)target;
   2536     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2537 }
   2538 
   2539 /************************ To Unicode ***************************************/
   2540 
   2541 static void
   2542 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2543                                                             UErrorCode* err){
   2544     char const* sourceStart;
   2545     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2546 
   2547     UConverterToUnicodeArgs subArgs;
   2548     int32_t minArgsSize;
   2549 
   2550     /* set up the subconverter arguments */
   2551     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2552         minArgsSize = args->size;
   2553     } else {
   2554         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2555     }
   2556 
   2557     uprv_memcpy(&subArgs, args, minArgsSize);
   2558     subArgs.size = (uint16_t)minArgsSize;
   2559     subArgs.converter = myData->currentConverter;
   2560 
   2561     /* remember the original start of the input for offsets */
   2562     sourceStart = args->source;
   2563 
   2564     if(myData->key != 0) {
   2565         /* continue with a partial escape sequence */
   2566         goto escape;
   2567     }
   2568 
   2569     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2570         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2571         subArgs.source = args->source;
   2572         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2573         if(subArgs.source != subArgs.sourceLimit) {
   2574             /*
   2575              * get the current partial byte sequence
   2576              *
   2577              * it needs to be moved between the public and the subconverter
   2578              * so that the conversion framework, which only sees the public
   2579              * converter, can handle truncated and illegal input etc.
   2580              */
   2581             if(args->converter->toULength > 0) {
   2582                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2583             }
   2584             subArgs.converter->toULength = args->converter->toULength;
   2585 
   2586             /*
   2587              * Convert up to the end of the input, or to before the next escape character.
   2588              * Does not handle conversion extensions because the preToU[] state etc.
   2589              * is not copied.
   2590              */
   2591             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2592 
   2593             if(args->offsets != NULL && sourceStart != args->source) {
   2594                 /* update offsets to base them on the actual start of the input */
   2595                 int32_t *offsets = args->offsets;
   2596                 UChar *target = args->target;
   2597                 int32_t delta = (int32_t)(args->source - sourceStart);
   2598                 while(target < subArgs.target) {
   2599                     if(*offsets >= 0) {
   2600                         *offsets += delta;
   2601                     }
   2602                     ++offsets;
   2603                     ++target;
   2604                 }
   2605             }
   2606             args->source = subArgs.source;
   2607             args->target = subArgs.target;
   2608             args->offsets = subArgs.offsets;
   2609 
   2610             /* copy input/error/overflow buffers */
   2611             if(subArgs.converter->toULength > 0) {
   2612                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2613             }
   2614             args->converter->toULength = subArgs.converter->toULength;
   2615 
   2616             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2617                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2618                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2619                                 subArgs.converter->UCharErrorBufferLength);
   2620                 }
   2621                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2622                 subArgs.converter->UCharErrorBufferLength = 0;
   2623             }
   2624         }
   2625 
   2626         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2627             return;
   2628         }
   2629 
   2630 escape:
   2631         changeState_2022(args->converter,
   2632                &(args->source),
   2633                args->sourceLimit,
   2634                ISO_2022_KR,
   2635                err);
   2636     }
   2637 }
   2638 
   2639 static void
   2640 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2641                                                             UErrorCode* err){
   2642     char tempBuf[2];
   2643     const char *mySource = ( char *) args->source;
   2644     UChar *myTarget = args->target;
   2645     const char *mySourceLimit = args->sourceLimit;
   2646     UChar32 targetUniChar = 0x0000;
   2647     UChar mySourceChar = 0x0000;
   2648     UConverterDataISO2022* myData;
   2649     UConverterSharedData* sharedData ;
   2650     UBool useFallback;
   2651 
   2652     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2653     if(myData->version==1){
   2654         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2655         return;
   2656     }
   2657 
   2658     /* initialize state */
   2659     sharedData = myData->currentConverter->sharedData;
   2660     useFallback = args->converter->useFallback;
   2661 
   2662     if(myData->key != 0) {
   2663         /* continue with a partial escape sequence */
   2664         goto escape;
   2665     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2666         /* continue with a partial double-byte character */
   2667         mySourceChar = args->converter->toUBytes[0];
   2668         args->converter->toULength = 0;
   2669         goto getTrailByte;
   2670     }
   2671 
   2672     while(mySource< mySourceLimit){
   2673 
   2674         if(myTarget < args->targetLimit){
   2675 
   2676             mySourceChar= (unsigned char) *mySource++;
   2677 
   2678             if(mySourceChar==UCNV_SI){
   2679                 myData->toU2022State.g = 0;
   2680                 if (myData->isEmptySegment) {
   2681                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2682                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2683                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2684                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2685                     args->converter->toULength = 1;
   2686                     args->target = myTarget;
   2687                     args->source = mySource;
   2688                     return;
   2689                 }
   2690                 /*consume the source */
   2691                 continue;
   2692             }else if(mySourceChar==UCNV_SO){
   2693                 myData->toU2022State.g = 1;
   2694                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2695                 /*consume the source */
   2696                 continue;
   2697             }else if(mySourceChar==ESC_2022){
   2698                 mySource--;
   2699 escape:
   2700                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2701                 changeState_2022(args->converter,&(mySource),
   2702                                 mySourceLimit, ISO_2022_KR, err);
   2703                 if(U_FAILURE(*err)){
   2704                     args->target = myTarget;
   2705                     args->source = mySource;
   2706                     return;
   2707                 }
   2708                 continue;
   2709             }
   2710 
   2711             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2712             if(myData->toU2022State.g == 1) {
   2713                 if(mySource < mySourceLimit) {
   2714                     int leadIsOk, trailIsOk;
   2715                     uint8_t trailByte;
   2716 getTrailByte:
   2717                     targetUniChar = missingCharMarker;
   2718                     trailByte = (uint8_t)*mySource;
   2719                     /*
   2720                      * Ticket 5691: consistent illegal sequences:
   2721                      * - We include at least the first byte in the illegal sequence.
   2722                      * - If any of the non-initial bytes could be the start of a character,
   2723                      *   we stop the illegal sequence before the first one of those.
   2724                      *
   2725                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2726                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2727                      * Otherwise we convert or report the pair of bytes.
   2728                      */
   2729                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2730                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2731                     if (leadIsOk && trailIsOk) {
   2732                         ++mySource;
   2733                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2734                         tempBuf[1] = (char)(trailByte + 0x80);
   2735                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2736                         mySourceChar = (mySourceChar << 8) | trailByte;
   2737                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2738                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2739                         ++mySource;
   2740                         /* add another bit so that the code below writes 2 bytes in case of error */
   2741                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2742                     }
   2743                 } else {
   2744                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2745                     args->converter->toULength = 1;
   2746                     break;
   2747                 }
   2748             }
   2749             else if(mySourceChar <= 0x7f) {
   2750                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2751             } else {
   2752                 targetUniChar = 0xffff;
   2753             }
   2754             if(targetUniChar < 0xfffe){
   2755                 if(args->offsets) {
   2756                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2757                 }
   2758                 *(myTarget++)=(UChar)targetUniChar;
   2759             }
   2760             else {
   2761                 /* Call the callback function*/
   2762                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2763                 break;
   2764             }
   2765         }
   2766         else{
   2767             *err =U_BUFFER_OVERFLOW_ERROR;
   2768             break;
   2769         }
   2770     }
   2771     args->target = myTarget;
   2772     args->source = mySource;
   2773 }
   2774 
   2775 /*************************** END ISO2022-KR *********************************/
   2776 
   2777 /*************************** ISO-2022-CN *********************************
   2778 *
   2779 * Rules for ISO-2022-CN Encoding:
   2780 * i)   The designator sequence must appear once on a line before any instance
   2781 *      of character set it designates.
   2782 * ii)  If two lines contain characters from the same character set, both lines
   2783 *      must include the designator sequence.
   2784 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2785 *      to invoke the  shifting
   2786 * iv)  All lines start in ASCII and end in ASCII.
   2787 * v)   Four shifting sequences are employed for this purpose:
   2788 *
   2789 *      Sequcence   ASCII Eq    Charsets
   2790 *      ----------  -------    ---------
   2791 *      SI           <SI>        US-ASCII
   2792 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2793 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2794 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2795 *
   2796 * vi)
   2797 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2798 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2799 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2800 *
   2801 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2802 *       characters as defined in GB 2312-80, until
   2803 *       another SOdesignation appears
   2804 *
   2805 *
   2806 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2807 *       in ISO-IR-165 (for details, see section 2.1),
   2808 *       until another SOdesignation appears
   2809 *
   2810 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2811 *       in CNS 11643-plane-1, until another
   2812 *       SOdesignation appears
   2813 *
   2814 *      ESC $ * H       Indicates the two bytes immediately following
   2815 *       SS2 is a Chinese character as defined in CNS
   2816 *       11643-plane-2, until another SS2designation
   2817 *       appears
   2818 *       (Meaning <ESC>N must preceed every 2 byte
   2819 *        sequence.)
   2820 *
   2821 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2822 *       is a Chinese character as defined in CNS
   2823 *       11643-plane-3, until another SS3designation
   2824 *       appears
   2825 *       (Meaning <ESC>O must preceed every 2 byte
   2826 *        sequence.)
   2827 *
   2828 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2829 *       is a Chinese character as defined in CNS
   2830 *       11643-plane-4, until another SS3designation
   2831 *       appears
   2832 *       (In English: <ESC>O must preceed every 2 byte
   2833 *        sequence.)
   2834 *
   2835 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2836 *       is a Chinese character as defined in CNS
   2837 *       11643-plane-5, until another SS3designation
   2838 *       appears
   2839 *
   2840 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2841 *       is a Chinese character as defined in CNS
   2842 *       11643-plane-6, until another SS3designation
   2843 *       appears
   2844 *
   2845 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2846 *       is a Chinese character as defined in CNS
   2847 *       11643-plane-7, until another SS3designation
   2848 *       appears
   2849 *
   2850 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2851 *       has its own designation information before any Chinese characters
   2852 *       appear
   2853 *
   2854 */
   2855 
   2856 /* The following are defined this way to make the strings truely readonly */
   2857 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2858 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2859 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2860 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2861 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2862 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2863 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2864 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2865 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2866 
   2867 /********************** ISO2022-CN Data **************************/
   2868 static const char* const escSeqCharsCN[10] ={
   2869         SHIFT_IN_STR,           /* ASCII */
   2870         GB_2312_80_STR,
   2871         ISO_IR_165_STR,
   2872         CNS_11643_1992_Plane_1_STR,
   2873         CNS_11643_1992_Plane_2_STR,
   2874         CNS_11643_1992_Plane_3_STR,
   2875         CNS_11643_1992_Plane_4_STR,
   2876         CNS_11643_1992_Plane_5_STR,
   2877         CNS_11643_1992_Plane_6_STR,
   2878         CNS_11643_1992_Plane_7_STR
   2879 };
   2880 
   2881 static void
   2882 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2883     UConverter *cnv = args->converter;
   2884     UConverterDataISO2022 *converterData;
   2885     ISO2022State *pFromU2022State;
   2886     uint8_t *target = (uint8_t *) args->target;
   2887     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2888     const UChar* source = args->source;
   2889     const UChar* sourceLimit = args->sourceLimit;
   2890     int32_t* offsets = args->offsets;
   2891     UChar32 sourceChar;
   2892     char buffer[8];
   2893     int32_t len;
   2894     int8_t choices[3];
   2895     int32_t choiceCount;
   2896     uint32_t targetValue = 0;
   2897     UBool useFallback;
   2898 
   2899     /* set up the state */
   2900     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2901     pFromU2022State   = &converterData->fromU2022State;
   2902 
   2903     choiceCount = 0;
   2904 
   2905     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2906     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2907         goto getTrail;
   2908     }
   2909 
   2910     while( source < sourceLimit){
   2911         if(target < targetLimit){
   2912 
   2913             sourceChar  = *(source++);
   2914             /*check if the char is a First surrogate*/
   2915              if(UTF_IS_SURROGATE(sourceChar)) {
   2916                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2917 getTrail:
   2918                     /*look ahead to find the trail surrogate*/
   2919                     if(source < sourceLimit) {
   2920                         /* test the following code unit */
   2921                         UChar trail=(UChar) *source;
   2922                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   2923                             source++;
   2924                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2925                             cnv->fromUChar32=0x00;
   2926                             /* convert this supplementary code point */
   2927                             /* exit this condition tree */
   2928                         } else {
   2929                             /* this is an unmatched lead code unit (1st surrogate) */
   2930                             /* callback(illegal) */
   2931                             *err=U_ILLEGAL_CHAR_FOUND;
   2932                             cnv->fromUChar32=sourceChar;
   2933                             break;
   2934                         }
   2935                     } else {
   2936                         /* no more input */
   2937                         cnv->fromUChar32=sourceChar;
   2938                         break;
   2939                     }
   2940                 } else {
   2941                     /* this is an unmatched trail code unit (2nd surrogate) */
   2942                     /* callback(illegal) */
   2943                     *err=U_ILLEGAL_CHAR_FOUND;
   2944                     cnv->fromUChar32=sourceChar;
   2945                     break;
   2946                 }
   2947             }
   2948 
   2949             /* do the conversion */
   2950             if(sourceChar <= 0x007f ){
   2951                 /* do not convert SO/SI/ESC */
   2952                 if(IS_2022_CONTROL(sourceChar)) {
   2953                     /* callback(illegal) */
   2954                     *err=U_ILLEGAL_CHAR_FOUND;
   2955                     cnv->fromUChar32=sourceChar;
   2956                     break;
   2957                 }
   2958 
   2959                 /* US-ASCII */
   2960                 if(pFromU2022State->g == 0) {
   2961                     buffer[0] = (char)sourceChar;
   2962                     len = 1;
   2963                 } else {
   2964                     buffer[0] = UCNV_SI;
   2965                     buffer[1] = (char)sourceChar;
   2966                     len = 2;
   2967                     pFromU2022State->g = 0;
   2968                     choiceCount = 0;
   2969                 }
   2970                 if(sourceChar == CR || sourceChar == LF) {
   2971                     /* reset the state at the end of a line */
   2972                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2973                     choiceCount = 0;
   2974                 }
   2975             }
   2976             else{
   2977                 /* convert U+0080..U+10ffff */
   2978                 int32_t i;
   2979                 int8_t cs, g;
   2980 
   2981                 if(choiceCount == 0) {
   2982                     /* try the current SO/G1 converter first */
   2983                     choices[0] = pFromU2022State->cs[1];
   2984 
   2985                     /* default to GB2312_1 if none is designated yet */
   2986                     if(choices[0] == 0) {
   2987                         choices[0] = GB2312_1;
   2988                     }
   2989 
   2990                     if(converterData->version == 0) {
   2991                         /* ISO-2022-CN */
   2992 
   2993                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   2994                         if(choices[0] == GB2312_1) {
   2995                             choices[1] = (int8_t)CNS_11643_1;
   2996                         } else {
   2997                             choices[1] = (int8_t)GB2312_1;
   2998                         }
   2999 
   3000                         choiceCount = 2;
   3001                     } else if (converterData->version == 1) {
   3002                         /* ISO-2022-CN-EXT */
   3003 
   3004                         /* try one of the other converters */
   3005                         switch(choices[0]) {
   3006                         case GB2312_1:
   3007                             choices[1] = (int8_t)CNS_11643_1;
   3008                             choices[2] = (int8_t)ISO_IR_165;
   3009                             break;
   3010                         case ISO_IR_165:
   3011                             choices[1] = (int8_t)GB2312_1;
   3012                             choices[2] = (int8_t)CNS_11643_1;
   3013                             break;
   3014                         default: /* CNS_11643_x */
   3015                             choices[1] = (int8_t)GB2312_1;
   3016                             choices[2] = (int8_t)ISO_IR_165;
   3017                             break;
   3018                         }
   3019 
   3020                         choiceCount = 3;
   3021                     } else {
   3022                         choices[0] = (int8_t)CNS_11643_1;
   3023                         choices[1] = (int8_t)GB2312_1;
   3024                     }
   3025                 }
   3026 
   3027                 cs = g = 0;
   3028                 /*
   3029                  * len==0: no mapping found yet
   3030                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3031                  * len>0: found a roundtrip result, done
   3032                  */
   3033                 len = 0;
   3034                 /*
   3035                  * We will turn off useFallback after finding a fallback,
   3036                  * but we still get fallbacks from PUA code points as usual.
   3037                  * Therefore, we will also need to check that we don't overwrite
   3038                  * an early fallback with a later one.
   3039                  */
   3040                 useFallback = cnv->useFallback;
   3041 
   3042                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3043                     int8_t cs0 = choices[i];
   3044                     if(cs0 > 0) {
   3045                         uint32_t value;
   3046                         int32_t len2;
   3047                         if(cs0 >= CNS_11643_0) {
   3048                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3049                                         converterData->myConverterArray[CNS_11643],
   3050                                         sourceChar,
   3051                                         &value,
   3052                                         useFallback,
   3053                                         MBCS_OUTPUT_3);
   3054                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3055                                 targetValue = value;
   3056                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3057                                 if(len2 >= 0) {
   3058                                     len = 2;
   3059                                 } else {
   3060                                     len = -2;
   3061                                     useFallback = FALSE;
   3062                                 }
   3063                                 if(cs == CNS_11643_1) {
   3064                                     g = 1;
   3065                                 } else if(cs == CNS_11643_2) {
   3066                                     g = 2;
   3067                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3068                                     g = 3;
   3069                                 } else {
   3070                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3071                                     len = 0;
   3072                                 }
   3073                             }
   3074                         } else {
   3075                             /* GB2312_1 or ISO-IR-165 */
   3076                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3077                                         converterData->myConverterArray[cs0],
   3078                                         sourceChar,
   3079                                         &value,
   3080                                         useFallback,
   3081                                         MBCS_OUTPUT_2);
   3082                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3083                                 targetValue = value;
   3084                                 len = len2;
   3085                                 cs = cs0;
   3086                                 g = 1;
   3087                                 useFallback = FALSE;
   3088                             }
   3089                         }
   3090                     }
   3091                 }
   3092 
   3093                 if(len != 0) {
   3094                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3095 
   3096                     /* write the designation sequence if necessary */
   3097                     if(cs != pFromU2022State->cs[g]) {
   3098                         if(cs < CNS_11643) {
   3099                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3100                         } else {
   3101                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3102                         }
   3103                         len = 4;
   3104                         pFromU2022State->cs[g] = cs;
   3105                         if(g == 1) {
   3106                             /* changing the SO/G1 charset invalidates the choices[] */
   3107                             choiceCount = 0;
   3108                         }
   3109                     }
   3110 
   3111                     /* write the shift sequence if necessary */
   3112                     if(g != pFromU2022State->g) {
   3113                         switch(g) {
   3114                         case 1:
   3115                             buffer[len++] = UCNV_SO;
   3116 
   3117                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3118                             pFromU2022State->g = 1;
   3119                             break;
   3120                         case 2:
   3121                             buffer[len++] = 0x1b;
   3122                             buffer[len++] = 0x4e;
   3123                             break;
   3124                         default: /* case 3 */
   3125                             buffer[len++] = 0x1b;
   3126                             buffer[len++] = 0x4f;
   3127                             break;
   3128                         }
   3129                     }
   3130 
   3131                     /* write the two output bytes */
   3132                     buffer[len++] = (char)(targetValue >> 8);
   3133                     buffer[len++] = (char)targetValue;
   3134                 } else {
   3135                     /* if we cannot find the character after checking all codepages
   3136                      * then this is an error
   3137                      */
   3138                     *err = U_INVALID_CHAR_FOUND;
   3139                     cnv->fromUChar32=sourceChar;
   3140                     break;
   3141                 }
   3142             }
   3143 
   3144             /* output len>0 bytes in buffer[] */
   3145             if(len == 1) {
   3146                 *target++ = buffer[0];
   3147                 if(offsets) {
   3148                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3149                 }
   3150             } else if(len == 2 && (target + 2) <= targetLimit) {
   3151                 *target++ = buffer[0];
   3152                 *target++ = buffer[1];
   3153                 if(offsets) {
   3154                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3155                     *offsets++ = sourceIndex;
   3156                     *offsets++ = sourceIndex;
   3157                 }
   3158             } else {
   3159                 fromUWriteUInt8(
   3160                     cnv,
   3161                     buffer, len,
   3162                     &target, (const char *)targetLimit,
   3163                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3164                     err);
   3165                 if(U_FAILURE(*err)) {
   3166                     break;
   3167                 }
   3168             }
   3169         } /* end if(myTargetIndex<myTargetLength) */
   3170         else{
   3171             *err =U_BUFFER_OVERFLOW_ERROR;
   3172             break;
   3173         }
   3174 
   3175     }/* end while(mySourceIndex<mySourceLength) */
   3176 
   3177     /*
   3178      * the end of the input stream and detection of truncated input
   3179      * are handled by the framework, but for ISO-2022-CN conversion
   3180      * we need to be in ASCII mode at the very end
   3181      *
   3182      * conditions:
   3183      *   successful
   3184      *   not in ASCII mode
   3185      *   end of input and no truncated input
   3186      */
   3187     if( U_SUCCESS(*err) &&
   3188         pFromU2022State->g!=0 &&
   3189         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3190     ) {
   3191         int32_t sourceIndex;
   3192 
   3193         /* we are switching to ASCII */
   3194         pFromU2022State->g=0;
   3195 
   3196         /* get the source index of the last input character */
   3197         /*
   3198          * TODO this would be simpler and more reliable if we used a pair
   3199          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3200          * so that we could simply use the prevSourceIndex here;
   3201          * this code gives an incorrect result for the rare case of an unmatched
   3202          * trail surrogate that is alone in the last buffer of the text stream
   3203          */
   3204         sourceIndex=(int32_t)(source-args->source);
   3205         if(sourceIndex>0) {
   3206             --sourceIndex;
   3207             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3208                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3209             ) {
   3210                 --sourceIndex;
   3211             }
   3212         } else {
   3213             sourceIndex=-1;
   3214         }
   3215 
   3216         fromUWriteUInt8(
   3217             cnv,
   3218             SHIFT_IN_STR, 1,
   3219             &target, (const char *)targetLimit,
   3220             &offsets, sourceIndex,
   3221             err);
   3222     }
   3223 
   3224     /*save the state and return */
   3225     args->source = source;
   3226     args->target = (char*)target;
   3227 }
   3228 
   3229 
   3230 static void
   3231 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3232                                                UErrorCode* err){
   3233     char tempBuf[3];
   3234     const char *mySource = (char *) args->source;
   3235     UChar *myTarget = args->target;
   3236     const char *mySourceLimit = args->sourceLimit;
   3237     uint32_t targetUniChar = 0x0000;
   3238     uint32_t mySourceChar = 0x0000;
   3239     UConverterDataISO2022* myData;
   3240     ISO2022State *pToU2022State;
   3241 
   3242     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3243     pToU2022State = &myData->toU2022State;
   3244 
   3245     if(myData->key != 0) {
   3246         /* continue with a partial escape sequence */
   3247         goto escape;
   3248     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3249         /* continue with a partial double-byte character */
   3250         mySourceChar = args->converter->toUBytes[0];
   3251         args->converter->toULength = 0;
   3252         targetUniChar = missingCharMarker;
   3253         goto getTrailByte;
   3254     }
   3255 
   3256     while(mySource < mySourceLimit){
   3257 
   3258         targetUniChar =missingCharMarker;
   3259 
   3260         if(myTarget < args->targetLimit){
   3261 
   3262             mySourceChar= (unsigned char) *mySource++;
   3263 
   3264             switch(mySourceChar){
   3265             case UCNV_SI:
   3266                 pToU2022State->g=0;
   3267                 if (myData->isEmptySegment) {
   3268                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3269                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3270                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3271                     args->converter->toUBytes[0] = mySourceChar;
   3272                     args->converter->toULength = 1;
   3273                     args->target = myTarget;
   3274                     args->source = mySource;
   3275                     return;
   3276                 }
   3277                 continue;
   3278 
   3279             case UCNV_SO:
   3280                 if(pToU2022State->cs[1] != 0) {
   3281                     pToU2022State->g=1;
   3282                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3283                     continue;
   3284                 } else {
   3285                     /* illegal to have SO before a matching designator */
   3286                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3287                     break;
   3288                 }
   3289 
   3290             case ESC_2022:
   3291                 mySource--;
   3292 escape:
   3293                 {
   3294                     const char * mySourceBefore = mySource;
   3295                     int8_t toULengthBefore = args->converter->toULength;
   3296 
   3297                     changeState_2022(args->converter,&(mySource),
   3298                         mySourceLimit, ISO_2022_CN,err);
   3299 
   3300                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3301                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3302                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3303                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3304                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   3305                     }
   3306                 }
   3307 
   3308                 /* invalid or illegal escape sequence */
   3309                 if(U_FAILURE(*err)){
   3310                     args->target = myTarget;
   3311                     args->source = mySource;
   3312                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3313                     return;
   3314                 }
   3315                 continue;
   3316 
   3317             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3318 
   3319             case CR:
   3320                 /*falls through*/
   3321             case LF:
   3322                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3323                 /* falls through */
   3324             default:
   3325                 /* convert one or two bytes */
   3326                 myData->isEmptySegment = FALSE;
   3327                 if(pToU2022State->g != 0) {
   3328                     if(mySource < mySourceLimit) {
   3329                         UConverterSharedData *cnv;
   3330                         StateEnum tempState;
   3331                         int32_t tempBufLen;
   3332                         int leadIsOk, trailIsOk;
   3333                         uint8_t trailByte;
   3334 getTrailByte:
   3335                         trailByte = (uint8_t)*mySource;
   3336                         /*
   3337                          * Ticket 5691: consistent illegal sequences:
   3338                          * - We include at least the first byte in the illegal sequence.
   3339                          * - If any of the non-initial bytes could be the start of a character,
   3340                          *   we stop the illegal sequence before the first one of those.
   3341                          *
   3342                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3343                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3344                          * Otherwise we convert or report the pair of bytes.
   3345                          */
   3346                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3347                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3348                         if (leadIsOk && trailIsOk) {
   3349                             ++mySource;
   3350                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3351                             if(tempState >= CNS_11643_0) {
   3352                                 cnv = myData->myConverterArray[CNS_11643];
   3353                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3354                                 tempBuf[1] = (char) (mySourceChar);
   3355                                 tempBuf[2] = (char) trailByte;
   3356                                 tempBufLen = 3;
   3357 
   3358                             }else{
   3359                                 cnv = myData->myConverterArray[tempState];
   3360                                 tempBuf[0] = (char) (mySourceChar);
   3361                                 tempBuf[1] = (char) trailByte;
   3362                                 tempBufLen = 2;
   3363                             }
   3364                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3365                             mySourceChar = (mySourceChar << 8) | trailByte;
   3366                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3367                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3368                             ++mySource;
   3369                             /* add another bit so that the code below writes 2 bytes in case of error */
   3370                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3371                         }
   3372                         if(pToU2022State->g>=2) {
   3373                             /* return from a single-shift state to the previous one */
   3374                             pToU2022State->g=pToU2022State->prevG;
   3375                         }
   3376                     } else {
   3377                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3378                         args->converter->toULength = 1;
   3379                         goto endloop;
   3380                     }
   3381                 }
   3382                 else{
   3383                     if(mySourceChar <= 0x7f) {
   3384                         targetUniChar = (UChar) mySourceChar;
   3385                     }
   3386                 }
   3387                 break;
   3388             }
   3389             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3390                 if(args->offsets){
   3391                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3392                 }
   3393                 *(myTarget++)=(UChar)targetUniChar;
   3394             }
   3395             else if(targetUniChar > missingCharMarker){
   3396                 /* disassemble the surrogate pair and write to output*/
   3397                 targetUniChar-=0x0010000;
   3398                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3399                 if(args->offsets){
   3400                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3401                 }
   3402                 ++myTarget;
   3403                 if(myTarget< args->targetLimit){
   3404                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3405                     if(args->offsets){
   3406                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3407                     }
   3408                     ++myTarget;
   3409                 }else{
   3410                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3411                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3412                 }
   3413 
   3414             }
   3415             else{
   3416                 /* Call the callback function*/
   3417                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3418                 break;
   3419             }
   3420         }
   3421         else{
   3422             *err =U_BUFFER_OVERFLOW_ERROR;
   3423             break;
   3424         }
   3425     }
   3426 endloop:
   3427     args->target = myTarget;
   3428     args->source = mySource;
   3429 }
   3430 
   3431 static void
   3432 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3433     UConverter *cnv = args->converter;
   3434     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3435     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3436     char *p, *subchar;
   3437     char buffer[8];
   3438     int32_t length;
   3439 
   3440     subchar=(char *)cnv->subChars;
   3441     length=cnv->subCharLen; /* assume length==1 for most variants */
   3442 
   3443     p = buffer;
   3444     switch(myConverterData->locale[0]){
   3445     case 'j':
   3446         {
   3447             int8_t cs;
   3448 
   3449             if(pFromU2022State->g == 1) {
   3450                 /* JIS7: switch from G1 to G0 */
   3451                 pFromU2022State->g = 0;
   3452                 *p++ = UCNV_SI;
   3453             }
   3454 
   3455             cs = pFromU2022State->cs[0];
   3456             if(cs != ASCII && cs != JISX201) {
   3457                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3458                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3459                 *p++ = '\x1b';
   3460                 *p++ = '\x28';
   3461                 *p++ = '\x42';
   3462             }
   3463 
   3464             *p++ = subchar[0];
   3465             break;
   3466         }
   3467     case 'c':
   3468         if(pFromU2022State->g != 0) {
   3469             /* not in ASCII mode: switch to ASCII */
   3470             pFromU2022State->g = 0;
   3471             *p++ = UCNV_SI;
   3472         }
   3473         *p++ = subchar[0];
   3474         break;
   3475     case 'k':
   3476         if(myConverterData->version == 0) {
   3477             if(length == 1) {
   3478                 if((UBool)args->converter->fromUnicodeStatus) {
   3479                     /* in DBCS mode: switch to SBCS */
   3480                     args->converter->fromUnicodeStatus = 0;
   3481                     *p++ = UCNV_SI;
   3482                 }
   3483                 *p++ = subchar[0];
   3484             } else /* length == 2*/ {
   3485                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3486                     /* in SBCS mode: switch to DBCS */
   3487                     args->converter->fromUnicodeStatus = 1;
   3488                     *p++ = UCNV_SO;
   3489                 }
   3490                 *p++ = subchar[0];
   3491                 *p++ = subchar[1];
   3492             }
   3493             break;
   3494         } else {
   3495             /* save the subconverter's substitution string */
   3496             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3497             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3498 
   3499             /* set our substitution string into the subconverter */
   3500             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3501             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3502 
   3503             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3504             args->converter = myConverterData->currentConverter;
   3505             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3506             ucnv_cbFromUWriteSub(args, 0, err);
   3507             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3508             args->converter = cnv;
   3509 
   3510             /* restore the subconverter's substitution string */
   3511             myConverterData->currentConverter->subChars = currentSubChars;
   3512             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3513 
   3514             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3515                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3516                     uprv_memcpy(
   3517                         cnv->charErrorBuffer,
   3518                         myConverterData->currentConverter->charErrorBuffer,
   3519                         myConverterData->currentConverter->charErrorBufferLength);
   3520                 }
   3521                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3522                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3523             }
   3524             return;
   3525         }
   3526     default:
   3527         /* not expected */
   3528         break;
   3529     }
   3530     ucnv_cbFromUWriteBytes(args,
   3531                            buffer, (int32_t)(p - buffer),
   3532                            offsetIndex, err);
   3533 }
   3534 
   3535 /*
   3536  * Structure for cloning an ISO 2022 converter into a single memory block.
   3537  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3538  * and then ucnv_safeClone() of the sub-converter may additionally align
   3539  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3540  * after currentConverter.
   3541  * This is because UAlignedMemory may be larger than the actually
   3542  * necessary alignment size for the platform.
   3543  * The other cloneStruct fields will not be moved around,
   3544  * and are aligned properly with cloneStruct's alignment.
   3545  */
   3546 struct cloneStruct
   3547 {
   3548     UConverter cnv;
   3549     UConverter currentConverter;
   3550     UAlignedMemory deadSpace;
   3551     UConverterDataISO2022 mydata;
   3552 };
   3553 
   3554 
   3555 static UConverter *
   3556 _ISO_2022_SafeClone(
   3557             const UConverter *cnv,
   3558             void *stackBuffer,
   3559             int32_t *pBufferSize,
   3560             UErrorCode *status)
   3561 {
   3562     struct cloneStruct * localClone;
   3563     UConverterDataISO2022 *cnvData;
   3564     int32_t i, size;
   3565 
   3566     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3567         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3568         return NULL;
   3569     }
   3570 
   3571     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3572     localClone = (struct cloneStruct *)stackBuffer;
   3573 
   3574     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3575 
   3576     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3577     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3578     localClone->cnv.isExtraLocal = TRUE;
   3579 
   3580     /* share the subconverters */
   3581 
   3582     if(cnvData->currentConverter != NULL) {
   3583         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3584         localClone->mydata.currentConverter =
   3585             ucnv_safeClone(cnvData->currentConverter,
   3586                             &localClone->currentConverter,
   3587                             &size, status);
   3588         if(U_FAILURE(*status)) {
   3589             return NULL;
   3590         }
   3591     }
   3592 
   3593     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3594         if(cnvData->myConverterArray[i] != NULL) {
   3595             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3596         }
   3597     }
   3598 
   3599     return &localClone->cnv;
   3600 }
   3601 
   3602 static void
   3603 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3604                     const USetAdder *sa,
   3605                     UConverterUnicodeSet which,
   3606                     UErrorCode *pErrorCode)
   3607 {
   3608     int32_t i;
   3609     UConverterDataISO2022* cnvData;
   3610 
   3611     if (U_FAILURE(*pErrorCode)) {
   3612         return;
   3613     }
   3614 #ifdef U_ENABLE_GENERIC_ISO_2022
   3615     if (cnv->sharedData == &_ISO2022Data) {
   3616         /* We use UTF-8 in this case */
   3617         sa->addRange(sa->set, 0, 0xd7FF);
   3618         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3619         return;
   3620     }
   3621 #endif
   3622 
   3623     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3624 
   3625     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3626     switch(cnvData->locale[0]){
   3627     case 'j':
   3628         /* include JIS X 0201 which is hardcoded */
   3629         sa->add(sa->set, 0xa5);
   3630         sa->add(sa->set, 0x203e);
   3631         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3632             /* include Latin-1 for some variants of JP */
   3633             sa->addRange(sa->set, 0, 0xff);
   3634         } else {
   3635             /* include ASCII for JP */
   3636             sa->addRange(sa->set, 0, 0x7f);
   3637         }
   3638         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3639             /*
   3640              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3641              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3642              * use half-width Katakana.
   3643              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3644              * half-width Katakana via the ESC ( I sequence.
   3645              * However, we only emit (fromUnicode) half-width Katakana according to the
   3646              * definition of each variant.
   3647              *
   3648              * When including fallbacks,
   3649              * we need to include half-width Katakana Unicode code points for all JP variants because
   3650              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3651              */
   3652             /* include half-width Katakana for JP */
   3653             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3654         }
   3655         break;
   3656     case 'c':
   3657     case 'z':
   3658         /* include ASCII for CN */
   3659         sa->addRange(sa->set, 0, 0x7f);
   3660         break;
   3661     case 'k':
   3662         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3663         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3664                 cnvData->currentConverter, sa, which, pErrorCode);
   3665         /* the loop over myConverterArray[] will simply not find another converter */
   3666         break;
   3667     default:
   3668         break;
   3669     }
   3670 
   3671 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3672             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3673                 cnvData->version==0 && i==CNS_11643
   3674             ) {
   3675                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3676                 ucnv_MBCSGetUnicodeSetForBytes(
   3677                         cnvData->myConverterArray[i],
   3678                         sa, UCNV_ROUNDTRIP_SET,
   3679                         0, 0x81, 0x82,
   3680                         pErrorCode);
   3681             }
   3682 #endif
   3683 
   3684     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3685         UConverterSetFilter filter;
   3686         if(cnvData->myConverterArray[i]!=NULL) {
   3687             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3688                 cnvData->version==0 && i==CNS_11643
   3689             ) {
   3690                 /*
   3691                  * Version-specific for CN:
   3692                  * CN version 0 does not map CNS planes 3..7 although
   3693                  * they are all available in the CNS conversion table;
   3694                  * CN version 1 (-EXT) does map them all.
   3695                  * The two versions create different Unicode sets.
   3696                  */
   3697                 filter=UCNV_SET_FILTER_2022_CN;
   3698             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3699                 /*
   3700                  * Only add code points that map to Shift-JIS codes
   3701                  * corresponding to JIS X 0208.
   3702                  */
   3703                 filter=UCNV_SET_FILTER_SJIS;
   3704             } else if(i==KSC5601) {
   3705                 /*
   3706                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3707                  * are broader than GR94.
   3708                  */
   3709                 filter=UCNV_SET_FILTER_GR94DBCS;
   3710             } else {
   3711                 filter=UCNV_SET_FILTER_NONE;
   3712             }
   3713             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3714         }
   3715     }
   3716 
   3717     /*
   3718      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3719      * sub-converters do by themselves.
   3720      * Remove these characters from the set.
   3721      */
   3722     sa->remove(sa->set, 0x0e);
   3723     sa->remove(sa->set, 0x0f);
   3724     sa->remove(sa->set, 0x1b);
   3725 
   3726     /* ISO 2022 converters do not convert C1 controls either */
   3727     sa->removeRange(sa->set, 0x80, 0x9f);
   3728 }
   3729 
   3730 static const UConverterImpl _ISO2022Impl={
   3731     UCNV_ISO_2022,
   3732 
   3733     NULL,
   3734     NULL,
   3735 
   3736     _ISO2022Open,
   3737     _ISO2022Close,
   3738     _ISO2022Reset,
   3739 
   3740 #ifdef U_ENABLE_GENERIC_ISO_2022
   3741     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3742     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3743     ucnv_fromUnicode_UTF8,
   3744     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3745 #else
   3746     NULL,
   3747     NULL,
   3748     NULL,
   3749     NULL,
   3750 #endif
   3751     NULL,
   3752 
   3753     NULL,
   3754     _ISO2022getName,
   3755     _ISO_2022_WriteSub,
   3756     _ISO_2022_SafeClone,
   3757     _ISO_2022_GetUnicodeSet
   3758 };
   3759 static const UConverterStaticData _ISO2022StaticData={
   3760     sizeof(UConverterStaticData),
   3761     "ISO_2022",
   3762     2022,
   3763     UCNV_IBM,
   3764     UCNV_ISO_2022,
   3765     1,
   3766     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3767     { 0x1a, 0, 0, 0 },
   3768     1,
   3769     FALSE,
   3770     FALSE,
   3771     0,
   3772     0,
   3773     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3774 };
   3775 const UConverterSharedData _ISO2022Data={
   3776     sizeof(UConverterSharedData),
   3777     ~((uint32_t) 0),
   3778     NULL,
   3779     NULL,
   3780     &_ISO2022StaticData,
   3781     FALSE,
   3782     &_ISO2022Impl,
   3783     0
   3784 };
   3785 
   3786 /*************JP****************/
   3787 static const UConverterImpl _ISO2022JPImpl={
   3788     UCNV_ISO_2022,
   3789 
   3790     NULL,
   3791     NULL,
   3792 
   3793     _ISO2022Open,
   3794     _ISO2022Close,
   3795     _ISO2022Reset,
   3796 
   3797     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3798     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3799     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3800     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3801     NULL,
   3802 
   3803     NULL,
   3804     _ISO2022getName,
   3805     _ISO_2022_WriteSub,
   3806     _ISO_2022_SafeClone,
   3807     _ISO_2022_GetUnicodeSet
   3808 };
   3809 static const UConverterStaticData _ISO2022JPStaticData={
   3810     sizeof(UConverterStaticData),
   3811     "ISO_2022_JP",
   3812     0,
   3813     UCNV_IBM,
   3814     UCNV_ISO_2022,
   3815     1,
   3816     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3817     { 0x1a, 0, 0, 0 },
   3818     1,
   3819     FALSE,
   3820     FALSE,
   3821     0,
   3822     0,
   3823     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3824 };
   3825 static const UConverterSharedData _ISO2022JPData={
   3826     sizeof(UConverterSharedData),
   3827     ~((uint32_t) 0),
   3828     NULL,
   3829     NULL,
   3830     &_ISO2022JPStaticData,
   3831     FALSE,
   3832     &_ISO2022JPImpl,
   3833     0
   3834 };
   3835 
   3836 /************* KR ***************/
   3837 static const UConverterImpl _ISO2022KRImpl={
   3838     UCNV_ISO_2022,
   3839 
   3840     NULL,
   3841     NULL,
   3842 
   3843     _ISO2022Open,
   3844     _ISO2022Close,
   3845     _ISO2022Reset,
   3846 
   3847     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3848     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3849     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3850     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3851     NULL,
   3852 
   3853     NULL,
   3854     _ISO2022getName,
   3855     _ISO_2022_WriteSub,
   3856     _ISO_2022_SafeClone,
   3857     _ISO_2022_GetUnicodeSet
   3858 };
   3859 static const UConverterStaticData _ISO2022KRStaticData={
   3860     sizeof(UConverterStaticData),
   3861     "ISO_2022_KR",
   3862     0,
   3863     UCNV_IBM,
   3864     UCNV_ISO_2022,
   3865     1,
   3866     3, /* max 3 bytes per UChar: SO+DBCS */
   3867     { 0x1a, 0, 0, 0 },
   3868     1,
   3869     FALSE,
   3870     FALSE,
   3871     0,
   3872     0,
   3873     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3874 };
   3875 static const UConverterSharedData _ISO2022KRData={
   3876     sizeof(UConverterSharedData),
   3877     ~((uint32_t) 0),
   3878     NULL,
   3879     NULL,
   3880     &_ISO2022KRStaticData,
   3881     FALSE,
   3882     &_ISO2022KRImpl,
   3883     0
   3884 };
   3885 
   3886 /*************** CN ***************/
   3887 static const UConverterImpl _ISO2022CNImpl={
   3888 
   3889     UCNV_ISO_2022,
   3890 
   3891     NULL,
   3892     NULL,
   3893 
   3894     _ISO2022Open,
   3895     _ISO2022Close,
   3896     _ISO2022Reset,
   3897 
   3898     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3899     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3900     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3901     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3902     NULL,
   3903 
   3904     NULL,
   3905     _ISO2022getName,
   3906     _ISO_2022_WriteSub,
   3907     _ISO_2022_SafeClone,
   3908     _ISO_2022_GetUnicodeSet
   3909 };
   3910 static const UConverterStaticData _ISO2022CNStaticData={
   3911     sizeof(UConverterStaticData),
   3912     "ISO_2022_CN",
   3913     0,
   3914     UCNV_IBM,
   3915     UCNV_ISO_2022,
   3916     1,
   3917     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3918     { 0x1a, 0, 0, 0 },
   3919     1,
   3920     FALSE,
   3921     FALSE,
   3922     0,
   3923     0,
   3924     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3925 };
   3926 static const UConverterSharedData _ISO2022CNData={
   3927     sizeof(UConverterSharedData),
   3928     ~((uint32_t) 0),
   3929     NULL,
   3930     NULL,
   3931     &_ISO2022CNStaticData,
   3932     FALSE,
   3933     &_ISO2022CNImpl,
   3934     0
   3935 };
   3936 
   3937 
   3938 
   3939 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3940