Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2009, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.c
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "ucnv_imp.h"
     38 #include "ucnv_bld.h"
     39 #include "ucnv_cnv.h"
     40 #include "ucnvmbcs.h"
     41 #include "cstring.h"
     42 #include "cmemory.h"
     43 
     44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     45 
     46 #ifdef U_ENABLE_GENERIC_ISO_2022
     47 /*
     48  * I am disabling the generic ISO-2022 converter after proposing to do so on
     49  * the icu mailing list two days ago.
     50  *
     51  * Reasons:
     52  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     53  *    its designation sequences, single shifts with return to the previous state,
     54  *    switch-with-no-return to UTF-16BE or similar, etc.
     55  *    This is unlike the language-specific variants like ISO-2022-JP which
     56  *    require a much smaller repertoire of ISO-2022 features.
     57  *    These variants continue to be supported.
     58  * 2. I believe that no one is really using the generic ISO-2022 converter
     59  *    but rather always one of the language-specific variants.
     60  *    Note that ICU's generic ISO-2022 converter has always output one escape
     61  *    sequence followed by UTF-8 for the whole stream.
     62  * 3. Switching between subcharsets is extremely slow, because each time
     63  *    the previous converter is closed and a new one opened,
     64  *    without any kind of caching, least-recently-used list, etc.
     65  * 4. The code is currently buggy, and given the above it does not seem
     66  *    reasonable to spend the time on maintenance.
     67  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     68  *    This means, for example, that when ISO-8859-7 is designated, the following
     69  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     70  *    The ICU ISO-2022 converter does not handle this - and has no information
     71  *    about which subconverter would have to be shifted vs. which is designed
     72  *    for 7-bit ISO-2022.
     73  *
     74  * Markus Scherer 2003-dec-03
     75  */
     76 #endif
     77 
     78 static const char SHIFT_IN_STR[]  = "\x0F";
     79 static const char SHIFT_OUT_STR[] = "\x0E";
     80 
     81 #define CR      0x0D
     82 #define LF      0x0A
     83 #define H_TAB   0x09
     84 #define V_TAB   0x0B
     85 #define SPACE   0x20
     86 
     87 enum {
     88     HWKANA_START=0xff61,
     89     HWKANA_END=0xff9f
     90 };
     91 
     92 /*
     93  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     94  * as bytes 21..7E. (Subtract 0x80.)
     95  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     96  * as bytes 20..7F. (Subtract 0x80.)
     97  * Do not encode C1 control codes with native bytes 80..9F
     98  * as bytes 00..1F (C0 control codes).
     99  */
    100 enum {
    101     GR94_START=0xa1,
    102     GR94_END=0xfe,
    103     GR96_START=0xa0,
    104     GR96_END=0xff
    105 };
    106 
    107 /*
    108  * ISO 2022 control codes must not be converted from Unicode
    109  * because they would mess up the byte stream.
    110  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    111  * corresponding to SO, SI, and ESC.
    112  */
    113 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    114 
    115 /* for ISO-2022-JP and -CN implementations */
    116 typedef enum  {
    117         /* shared values */
    118         INVALID_STATE=-1,
    119         ASCII = 0,
    120 
    121         SS2_STATE=0x10,
    122         SS3_STATE,
    123 
    124         /* JP */
    125         ISO8859_1 = 1 ,
    126         ISO8859_7 = 2 ,
    127         JISX201  = 3,
    128         JISX208 = 4,
    129         JISX212 = 5,
    130         GB2312  =6,
    131         KSC5601 =7,
    132         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    133 
    134         /* CN */
    135         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    136         GB2312_1=1,
    137         ISO_IR_165=2,
    138         CNS_11643=3,
    139 
    140         /*
    141          * these are used in StateEnum and ISO2022State variables,
    142          * but CNS_11643 must be used to index into myConverterArray[]
    143          */
    144         CNS_11643_0=0x20,
    145         CNS_11643_1,
    146         CNS_11643_2,
    147         CNS_11643_3,
    148         CNS_11643_4,
    149         CNS_11643_5,
    150         CNS_11643_6,
    151         CNS_11643_7
    152 } StateEnum;
    153 
    154 /* is the StateEnum charset value for a DBCS charset? */
    155 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    156 
    157 #define CSM(cs) ((uint16_t)1<<(cs))
    158 
    159 /*
    160  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    161  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    162  *
    163  * Note: The converter uses some leniency:
    164  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    165  *   all versions, not just JIS7 and JIS8.
    166  * - ICU does not distinguish between different versions of JIS X 0208.
    167  */
    168 enum { MAX_JA_VERSION=4 };
    169 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    170     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    171     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    174     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    175 };
    176 
    177 typedef enum {
    178         ASCII1=0,
    179         LATIN1,
    180         SBCS,
    181         DBCS,
    182         MBCS,
    183         HWKANA
    184 }Cnv2022Type;
    185 
    186 typedef struct ISO2022State {
    187     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    188     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    189     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    190 } ISO2022State;
    191 
    192 #define UCNV_OPTIONS_VERSION_MASK 0xf
    193 #define UCNV_2022_MAX_CONVERTERS 10
    194 
    195 typedef struct{
    196     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    197     UConverter *currentConverter;
    198     Cnv2022Type currentType;
    199     ISO2022State toU2022State, fromU2022State;
    200     uint32_t key;
    201     uint32_t version;
    202 #ifdef U_ENABLE_GENERIC_ISO_2022
    203     UBool isFirstBuffer;
    204 #endif
    205     UBool isEmptySegment;
    206     char name[30];
    207     char locale[3];
    208 }UConverterDataISO2022;
    209 
    210 /* Protos */
    211 /* ISO-2022 ----------------------------------------------------------------- */
    212 
    213 /*Forward declaration */
    214 U_CFUNC void
    215 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    216                       UErrorCode * err);
    217 U_CFUNC void
    218 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    219                                     UErrorCode * err);
    220 
    221 #define ESC_2022 0x1B /*ESC*/
    222 
    223 typedef enum
    224 {
    225         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    226         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    227         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    228         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    229 } UCNV_TableStates_2022;
    230 
    231 /*
    232 * The way these state transition arrays work is:
    233 * ex : ESC$B is the sequence for JISX208
    234 *      a) First Iteration: char is ESC
    235 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    236 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    237 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    238 *             value of x is stored at escSeqStateTable_Key_2022[0]
    239 *        iii) Save this index as offset
    240 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    241 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    242 *     b) Switch on this state and continue to next char
    243 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    244 *             which is normalize_esq_chars_2022[36] == 4
    245 *         ii) x is currently 1(from above)
    246 *               x<<=5 -- x is now 32
    247 *               x+=normalize_esq_chars_2022[36]
    248 *               now x is 36
    249 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    250 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    251 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    252 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    253 *     c) Switch on this state and continue to next char
    254 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    255 *        ii) x is currently 36 (from above)
    256 *            x<<=5 -- x is now 1152
    257 *            x+=normalize_esq_chars_2022[66]
    258 *            now x is 1161
    259 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    260 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    261 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    262 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    263 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    264 */
    265 
    266 
    267 /*Below are the 3 arrays depicting a state transition table*/
    268 static const int8_t normalize_esq_chars_2022[256] = {
    269 /*       0      1       2       3       4      5       6        7       8       9           */
    270 
    271          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    272         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    273         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    274         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    275         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    276         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    277         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    278         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    279         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    280         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    281         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    282         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0
    297 };
    298 
    299 #ifdef U_ENABLE_GENERIC_ISO_2022
    300 /*
    301  * When the generic ISO-2022 converter is completely removed, not just disabled
    302  * per #ifdef, then the following state table and the associated tables that are
    303  * dimensioned with MAX_STATES_2022 should be trimmed.
    304  *
    305  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    306  * the associated escape sequences starting with ESC ( B should be removed.
    307  * This includes the ones with key values 1097 and all of the ones above 1000000.
    308  *
    309  * For the latter, the tables can simply be truncated.
    310  * For the former, since the tables must be kept parallel, it is probably best
    311  * to simply duplicate an adjacent table cell, parallel in all tables.
    312  *
    313  * It may make sense to restructure the tables, especially by using small search
    314  * tables for the variants instead of indexing them parallel to the table here.
    315  */
    316 #endif
    317 
    318 #define MAX_STATES_2022 74
    319 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    320 /*   0           1           2           3           4           5           6           7           8           9           */
    321 
    322      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    323     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    324     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    325     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    326     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    327     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    328     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    329     ,35947631   ,35947635   ,35947636   ,35947638
    330 };
    331 
    332 #ifdef U_ENABLE_GENERIC_ISO_2022
    333 
    334 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    335  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    336 
    337      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    338     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    339     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    340     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    341     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    342     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    343     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    344     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    345 };
    346 
    347 #endif
    348 
    349 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    350 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    351      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    352     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    353     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    354     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    355     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    357     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    358     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    359 };
    360 
    361 
    362 /* Type def for refactoring changeState_2022 code*/
    363 typedef enum{
    364 #ifdef U_ENABLE_GENERIC_ISO_2022
    365     ISO_2022=0,
    366 #endif
    367     ISO_2022_JP=1,
    368     ISO_2022_KR=2,
    369     ISO_2022_CN=3
    370 } Variant2022;
    371 
    372 /*********** ISO 2022 Converter Protos ***********/
    373 static void
    374 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    375 
    376 static void
    377  _ISO2022Close(UConverter *converter);
    378 
    379 static void
    380 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    381 
    382 static const char*
    383 _ISO2022getName(const UConverter* cnv);
    384 
    385 static void
    386 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    387 
    388 static UConverter *
    389 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    390 
    391 #ifdef U_ENABLE_GENERIC_ISO_2022
    392 static void
    393 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    394 #endif
    395 
    396 /*const UConverterSharedData _ISO2022Data;*/
    397 static const UConverterSharedData _ISO2022JPData;
    398 static const UConverterSharedData _ISO2022KRData;
    399 static const UConverterSharedData _ISO2022CNData;
    400 
    401 /*************** Converter implementations ******************/
    402 
    403 /* The purpose of this function is to get around gcc compiler warnings. */
    404 static U_INLINE void
    405 fromUWriteUInt8(UConverter *cnv,
    406                  const char *bytes, int32_t length,
    407                  uint8_t **target, const char *targetLimit,
    408                  int32_t **offsets,
    409                  int32_t sourceIndex,
    410                  UErrorCode *pErrorCode)
    411 {
    412     char *targetChars = (char *)*target;
    413     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    414                          offsets, sourceIndex, pErrorCode);
    415     *target = (uint8_t*)targetChars;
    416 
    417 }
    418 
    419 static U_INLINE void
    420 setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){
    421     if(myConverterData->version == 1) {
    422         UConverter *cnv = myConverterData->currentConverter;
    423 
    424         cnv->toUnicodeStatus=0;     /* offset */
    425         cnv->mode=0;                /* state */
    426         cnv->toULength=0;           /* byteIndex */
    427     }
    428 }
    429 
    430 static U_INLINE void
    431 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    432    /* in ISO-2022-KR the designator sequence appears only once
    433     * in a file so we append it only once
    434     */
    435     if( converter->charErrorBufferLength==0){
    436 
    437         converter->charErrorBufferLength = 4;
    438         converter->charErrorBuffer[0] = 0x1b;
    439         converter->charErrorBuffer[1] = 0x24;
    440         converter->charErrorBuffer[2] = 0x29;
    441         converter->charErrorBuffer[3] = 0x43;
    442     }
    443     if(myConverterData->version == 1) {
    444         UConverter *cnv = myConverterData->currentConverter;
    445 
    446         cnv->fromUChar32=0;
    447         cnv->fromUnicodeStatus=1;   /* prevLength */
    448     }
    449 }
    450 
    451 static void
    452 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    453 
    454     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    455 
    456     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    457     if(cnv->extraInfo != NULL) {
    458         UConverterNamePieces stackPieces;
    459         UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) };
    460         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    461         uint32_t version;
    462 
    463         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    464 
    465         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    466         myConverterData->currentType = ASCII1;
    467         cnv->fromUnicodeStatus =FALSE;
    468         if(pArgs->locale){
    469             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    470         }
    471         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    472         myConverterData->version = version;
    473         if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
    474             (myLocale[2]=='_' || myLocale[2]=='\0'))
    475         {
    476             size_t len=0;
    477             /* open the required converters and cache them */
    478             if(version>MAX_JA_VERSION) {
    479                 /* prevent indexing beyond jpCharsetMasks[] */
    480                 myConverterData->version = version = 0;
    481             }
    482             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    483                 myConverterData->myConverterArray[ISO8859_7] =
    484                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    485             }
    486             myConverterData->myConverterArray[JISX208] =
    487                 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
    488             if(jpCharsetMasks[version]&CSM(JISX212)) {
    489                 myConverterData->myConverterArray[JISX212] =
    490                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    491             }
    492             if(jpCharsetMasks[version]&CSM(GB2312)) {
    493                 myConverterData->myConverterArray[GB2312] =
    494                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
    495             }
    496             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    497                 myConverterData->myConverterArray[KSC5601] =
    498                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    499             }
    500 
    501             /* set the function pointers to appropriate funtions */
    502             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    503             uprv_strcpy(myConverterData->locale,"ja");
    504 
    505             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    506             len = uprv_strlen(myConverterData->name);
    507             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    508             myConverterData->name[len+1]='\0';
    509         }
    510         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    511             (myLocale[2]=='_' || myLocale[2]=='\0'))
    512         {
    513             const char *cnvName;
    514             if(version==1) {
    515                 cnvName="icu-internal-25546";
    516             } else {
    517                 cnvName="ksc_5601";
    518                 myConverterData->version=version=0;
    519             }
    520             if(pArgs->onlyTestIsLoadable) {
    521                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    522                 uprv_free(cnv->extraInfo);
    523                 cnv->extraInfo=NULL;
    524                 return;
    525             } else {
    526                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    527                 if (U_FAILURE(*errorCode)) {
    528                     _ISO2022Close(cnv);
    529                     return;
    530                 }
    531 
    532                 if(version==1) {
    533                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    534                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    535                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    536                 }else{
    537                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    538                 }
    539 
    540                 /* initialize the state variables */
    541                 setInitialStateToUnicodeKR(cnv, myConverterData);
    542                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    543 
    544                 /* set the function pointers to appropriate funtions */
    545                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    546                 uprv_strcpy(myConverterData->locale,"ko");
    547             }
    548         }
    549         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    550             (myLocale[2]=='_' || myLocale[2]=='\0'))
    551         {
    552 
    553             /* open the required converters and cache them */
    554             myConverterData->myConverterArray[GB2312_1] =
    555                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    556             if(version==1) {
    557                 myConverterData->myConverterArray[ISO_IR_165] =
    558                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    559             }
    560             myConverterData->myConverterArray[CNS_11643] =
    561                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    562 
    563 
    564             /* set the function pointers to appropriate funtions */
    565             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    566             uprv_strcpy(myConverterData->locale,"cn");
    567 
    568             if (version==1){
    569                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    570             }else{
    571                 myConverterData->version = 0;
    572                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    573             }
    574         }
    575         else{
    576 #ifdef U_ENABLE_GENERIC_ISO_2022
    577             myConverterData->isFirstBuffer = TRUE;
    578 
    579             /* append the UTF-8 escape sequence */
    580             cnv->charErrorBufferLength = 3;
    581             cnv->charErrorBuffer[0] = 0x1b;
    582             cnv->charErrorBuffer[1] = 0x25;
    583             cnv->charErrorBuffer[2] = 0x42;
    584 
    585             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    586             /* initialize the state variables */
    587             uprv_strcpy(myConverterData->name,"ISO_2022");
    588 #else
    589             *errorCode = U_UNSUPPORTED_ERROR;
    590             return;
    591 #endif
    592         }
    593 
    594         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    595 
    596         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    597             _ISO2022Close(cnv);
    598         }
    599     } else {
    600         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    601     }
    602 }
    603 
    604 
    605 static void
    606 _ISO2022Close(UConverter *converter) {
    607     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    608     UConverterSharedData **array = myData->myConverterArray;
    609     int32_t i;
    610 
    611     if (converter->extraInfo != NULL) {
    612         /*close the array of converter pointers and free the memory*/
    613         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    614             if(array[i]!=NULL) {
    615                 ucnv_unloadSharedDataIfReady(array[i]);
    616             }
    617         }
    618 
    619         ucnv_close(myData->currentConverter);
    620 
    621         if(!converter->isExtraLocal){
    622             uprv_free (converter->extraInfo);
    623             converter->extraInfo = NULL;
    624         }
    625     }
    626 }
    627 
    628 static void
    629 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    630     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    631     if(choice<=UCNV_RESET_TO_UNICODE) {
    632         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    633         myConverterData->key = 0;
    634         myConverterData->isEmptySegment = FALSE;
    635     }
    636     if(choice!=UCNV_RESET_TO_UNICODE) {
    637         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    638     }
    639 #ifdef U_ENABLE_GENERIC_ISO_2022
    640     if(myConverterData->locale[0] == 0){
    641         if(choice<=UCNV_RESET_TO_UNICODE) {
    642             myConverterData->isFirstBuffer = TRUE;
    643             myConverterData->key = 0;
    644             if (converter->mode == UCNV_SO){
    645                 ucnv_close (myConverterData->currentConverter);
    646                 myConverterData->currentConverter=NULL;
    647             }
    648             converter->mode = UCNV_SI;
    649         }
    650         if(choice!=UCNV_RESET_TO_UNICODE) {
    651             /* re-append UTF-8 escape sequence */
    652             converter->charErrorBufferLength = 3;
    653             converter->charErrorBuffer[0] = 0x1b;
    654             converter->charErrorBuffer[1] = 0x28;
    655             converter->charErrorBuffer[2] = 0x42;
    656         }
    657     }
    658     else
    659 #endif
    660     {
    661         /* reset the state variables */
    662         if(myConverterData->locale[0] == 'k'){
    663             if(choice<=UCNV_RESET_TO_UNICODE) {
    664                 setInitialStateToUnicodeKR(converter, myConverterData);
    665             }
    666             if(choice!=UCNV_RESET_TO_UNICODE) {
    667                 setInitialStateFromUnicodeKR(converter, myConverterData);
    668             }
    669         }
    670     }
    671 }
    672 
    673 static const char*
    674 _ISO2022getName(const UConverter* cnv){
    675     if(cnv->extraInfo){
    676         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    677         return myData->name;
    678     }
    679     return NULL;
    680 }
    681 
    682 
    683 /*************** to unicode *******************/
    684 /****************************************************************************
    685  * Recognized escape sequences are
    686  * <ESC>(B  ASCII
    687  * <ESC>.A  ISO-8859-1
    688  * <ESC>.F  ISO-8859-7
    689  * <ESC>(J  JISX-201
    690  * <ESC>(I  JISX-201
    691  * <ESC>$B  JISX-208
    692  * <ESC>$@  JISX-208
    693  * <ESC>$(D JISX-212
    694  * <ESC>$A  GB2312
    695  * <ESC>$(C KSC5601
    696  */
    697 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    698 /*      0                1               2               3               4               5               6               7               8               9    */
    699     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    700     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    701     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    702     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    703     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    704     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    705     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    706     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    707 };
    708 
    709 /*************** to unicode *******************/
    710 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    711 /*      0                1               2               3               4               5               6               7               8               9    */
    712      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    713     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    714     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    715     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    716     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    717     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    718     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    719     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    720 };
    721 
    722 
    723 static UCNV_TableStates_2022
    724 getKey_2022(char c,int32_t* key,int32_t* offset){
    725     int32_t togo;
    726     int32_t low = 0;
    727     int32_t hi = MAX_STATES_2022;
    728     int32_t oldmid=0;
    729 
    730     togo = normalize_esq_chars_2022[(uint8_t)c];
    731     if(togo == 0) {
    732         /* not a valid character anywhere in an escape sequence */
    733         *key = 0;
    734         *offset = 0;
    735         return INVALID_2022;
    736     }
    737     togo = (*key << 5) + togo;
    738 
    739     while (hi != low)  /*binary search*/{
    740 
    741         register int32_t mid = (hi+low) >> 1; /*Finds median*/
    742 
    743         if (mid == oldmid)
    744             break;
    745 
    746         if (escSeqStateTable_Key_2022[mid] > togo){
    747             hi = mid;
    748         }
    749         else if (escSeqStateTable_Key_2022[mid] < togo){
    750             low = mid;
    751         }
    752         else /*we found it*/{
    753             *key = togo;
    754             *offset = mid;
    755             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    756         }
    757         oldmid = mid;
    758 
    759     }
    760 
    761     *key = 0;
    762     *offset = 0;
    763     return INVALID_2022;
    764 }
    765 
    766 /*runs through a state machine to determine the escape sequence - codepage correspondance
    767  */
    768 static void
    769 changeState_2022(UConverter* _this,
    770                 const char** source,
    771                 const char* sourceLimit,
    772                 Variant2022 var,
    773                 UErrorCode* err){
    774     UCNV_TableStates_2022 value;
    775     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    776     uint32_t key = myData2022->key;
    777     int32_t offset = 0;
    778     int8_t initialToULength = _this->toULength;
    779     char c;
    780 
    781     value = VALID_NON_TERMINAL_2022;
    782     while (*source < sourceLimit) {
    783         c = *(*source)++;
    784         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    785         value = getKey_2022(c,(int32_t *) &key, &offset);
    786 
    787         switch (value){
    788 
    789         case VALID_NON_TERMINAL_2022 :
    790             /* continue with the loop */
    791             break;
    792 
    793         case VALID_TERMINAL_2022:
    794             key = 0;
    795             goto DONE;
    796 
    797         case INVALID_2022:
    798             goto DONE;
    799 
    800         case VALID_MAYBE_TERMINAL_2022:
    801 #ifdef U_ENABLE_GENERIC_ISO_2022
    802             /* ESC ( B is ambiguous only for ISO_2022 itself */
    803             if(var == ISO_2022) {
    804                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    805                 _this->toULength = 0;
    806 
    807                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    808 
    809                 /* continue with the loop */
    810                 value = VALID_NON_TERMINAL_2022;
    811                 break;
    812             } else
    813 #endif
    814             {
    815                 /* not ISO_2022 itself, finish here */
    816                 value = VALID_TERMINAL_2022;
    817                 key = 0;
    818                 goto DONE;
    819             }
    820         }
    821     }
    822 
    823 DONE:
    824     myData2022->key = key;
    825 
    826     if (value == VALID_NON_TERMINAL_2022) {
    827         /* indicate that the escape sequence is incomplete: key!=0 */
    828         return;
    829     } else if (value == INVALID_2022 ) {
    830         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    831     } else /* value == VALID_TERMINAL_2022 */ {
    832         switch(var){
    833 #ifdef U_ENABLE_GENERIC_ISO_2022
    834         case ISO_2022:
    835         {
    836             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    837             if(chosenConverterName == NULL) {
    838                 /* SS2 or SS3 */
    839                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    840                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    841                 return;
    842             }
    843 
    844             _this->mode = UCNV_SI;
    845             ucnv_close(myData2022->currentConverter);
    846             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    847             if(U_SUCCESS(*err)) {
    848                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    849                 _this->mode = UCNV_SO;
    850             }
    851             break;
    852         }
    853 #endif
    854         case ISO_2022_JP:
    855             {
    856                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    857                 switch(tempState) {
    858                 case INVALID_STATE:
    859                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    860                     break;
    861                 case SS2_STATE:
    862                     if(myData2022->toU2022State.cs[2]!=0) {
    863                         if(myData2022->toU2022State.g<2) {
    864                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    865                         }
    866                         myData2022->toU2022State.g=2;
    867                     } else {
    868                         /* illegal to have SS2 before a matching designator */
    869                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    870                     }
    871                     break;
    872                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    873                 case ISO8859_1:
    874                 case ISO8859_7:
    875                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    876                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    877                     } else {
    878                         /* G2 charset for SS2 */
    879                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    880                     }
    881                     break;
    882                 default:
    883                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    884                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    885                     } else {
    886                         /* G0 charset */
    887                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    888                     }
    889                     break;
    890                 }
    891             }
    892             break;
    893         case ISO_2022_CN:
    894             {
    895                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    896                 switch(tempState) {
    897                 case INVALID_STATE:
    898                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    899                     break;
    900                 case SS2_STATE:
    901                     if(myData2022->toU2022State.cs[2]!=0) {
    902                         if(myData2022->toU2022State.g<2) {
    903                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    904                         }
    905                         myData2022->toU2022State.g=2;
    906                     } else {
    907                         /* illegal to have SS2 before a matching designator */
    908                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    909                     }
    910                     break;
    911                 case SS3_STATE:
    912                     if(myData2022->toU2022State.cs[3]!=0) {
    913                         if(myData2022->toU2022State.g<2) {
    914                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    915                         }
    916                         myData2022->toU2022State.g=3;
    917                     } else {
    918                         /* illegal to have SS3 before a matching designator */
    919                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    920                     }
    921                     break;
    922                 case ISO_IR_165:
    923                     if(myData2022->version==0) {
    924                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    925                         break;
    926                     }
    927                     /*fall through*/
    928                 case GB2312_1:
    929                     /*fall through*/
    930                 case CNS_11643_1:
    931                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    932                     break;
    933                 case CNS_11643_2:
    934                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    935                     break;
    936                 default:
    937                     /* other CNS 11643 planes */
    938                     if(myData2022->version==0) {
    939                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    940                     } else {
    941                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    942                     }
    943                     break;
    944                 }
    945             }
    946             break;
    947         case ISO_2022_KR:
    948             if(offset==0x30){
    949                 /* nothing to be done, just accept this one escape sequence */
    950             } else {
    951                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    952             }
    953             break;
    954 
    955         default:
    956             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    957             break;
    958         }
    959     }
    960     if(U_SUCCESS(*err)) {
    961         _this->toULength = 0;
    962     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
    963         if(_this->toULength>1) {
    964             /*
    965              * Ticket 5691: consistent illegal sequences:
    966              * - We include at least the first byte (ESC) in the illegal sequence.
    967              * - If any of the non-initial bytes could be the start of a character,
    968              *   we stop the illegal sequence before the first one of those.
    969              *   In escape sequences, all following bytes are "printable", that is,
    970              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
    971              *   they are valid single/lead bytes.
    972              *   For simplicity, we always only report the initial ESC byte as the
    973              *   illegal sequence and back out all other bytes we looked at.
    974              */
    975             /* Back out some bytes. */
    976             int8_t backOutDistance=_this->toULength-1;
    977             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
    978             if(backOutDistance<=bytesFromThisBuffer) {
    979                 /* same as initialToULength<=1 */
    980                 *source-=backOutDistance;
    981             } else {
    982                 /* Back out bytes from the previous buffer: Need to replay them. */
    983                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
    984                 /* same as -(initialToULength-1) */
    985                 /* preToULength is negative! */
    986                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
    987                 *source-=bytesFromThisBuffer;
    988             }
    989             _this->toULength=1;
    990         }
    991     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
    992         _this->toUCallbackReason = UCNV_UNASSIGNED;
    993     }
    994 }
    995 
    996 /*Checks the characters of the buffer against valid 2022 escape sequences
    997 *if the match we return a pointer to the initial start of the sequence otherwise
    998 *we return sourceLimit
    999 */
   1000 /*for 2022 looks ahead in the stream
   1001  *to determine the longest possible convertible
   1002  *data stream
   1003  */
   1004 static U_INLINE const char*
   1005 getEndOfBuffer_2022(const char** source,
   1006                    const char* sourceLimit,
   1007                    UBool flush){
   1008 
   1009     const char* mySource = *source;
   1010 
   1011 #ifdef U_ENABLE_GENERIC_ISO_2022
   1012     if (*source >= sourceLimit)
   1013         return sourceLimit;
   1014 
   1015     do{
   1016 
   1017         if (*mySource == ESC_2022){
   1018             int8_t i;
   1019             int32_t key = 0;
   1020             int32_t offset;
   1021             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1022 
   1023             /* Kludge: I could not
   1024             * figure out the reason for validating an escape sequence
   1025             * twice - once here and once in changeState_2022().
   1026             * is it possible to have an ESC character in a ISO2022
   1027             * byte stream which is valid in a code page? Is it legal?
   1028             */
   1029             for (i=0;
   1030             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1031             i++) {
   1032                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1033             }
   1034             if (value > 0 || *mySource==ESC_2022)
   1035                 return mySource;
   1036 
   1037             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1038                 return sourceLimit;
   1039         }
   1040     }while (++mySource < sourceLimit);
   1041 
   1042     return sourceLimit;
   1043 #else
   1044     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1045         ++mySource;
   1046     }
   1047     return mySource;
   1048 #endif
   1049 }
   1050 
   1051 
   1052 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1053  * any future change in _MBCSFromUChar32() function should be reflected here.
   1054  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1055  */
   1056 static U_INLINE int32_t
   1057 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1058                                          UChar32 c,
   1059                                          uint32_t* value,
   1060                                          UBool useFallback,
   1061                                          int outputType)
   1062 {
   1063     const int32_t *cx;
   1064     const uint16_t *table;
   1065     uint32_t stage2Entry;
   1066     uint32_t myValue;
   1067     int32_t length;
   1068     const uint8_t *p;
   1069     /*
   1070      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1071      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1072      * else U_INTERNAL_PROGRAM_ERROR.
   1073      */
   1074     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1075     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1076         table=sharedData->mbcs.fromUnicodeTable;
   1077         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1078         /* get the bytes and the length for the output */
   1079         if(outputType==MBCS_OUTPUT_2){
   1080             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1081             if(myValue<=0xff) {
   1082                 length=1;
   1083             } else {
   1084                 length=2;
   1085             }
   1086         } else /* outputType==MBCS_OUTPUT_3 */ {
   1087             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1088             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1089             if(myValue<=0xff) {
   1090                 length=1;
   1091             } else if(myValue<=0xffff) {
   1092                 length=2;
   1093             } else {
   1094                 length=3;
   1095             }
   1096         }
   1097         /* is this code point assigned, or do we use fallbacks? */
   1098         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1099             /* assigned */
   1100             *value=myValue;
   1101             return length;
   1102         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1103             /*
   1104              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1105              * There is no way with this data structure for fallback output
   1106              * to be a zero byte.
   1107              */
   1108             *value=myValue;
   1109             return -length;
   1110         }
   1111     }
   1112 
   1113     cx=sharedData->mbcs.extIndexes;
   1114     if(cx!=NULL) {
   1115         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1116     }
   1117 
   1118     /* unassigned */
   1119     return 0;
   1120 }
   1121 
   1122 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1123  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1124  * @param retval pointer to output byte
   1125  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1126  */
   1127 static U_INLINE int32_t
   1128 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1129                                        UChar32 c,
   1130                                        uint32_t* retval,
   1131                                        UBool useFallback)
   1132 {
   1133     const uint16_t *table;
   1134     int32_t value;
   1135     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1136     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1137         return 0;
   1138     }
   1139     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1140     table=sharedData->mbcs.fromUnicodeTable;
   1141     /* get the byte for the output */
   1142     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1143     /* is this code point assigned, or do we use fallbacks? */
   1144     *retval=(uint32_t)(value&0xff);
   1145     if(value>=0xf00) {
   1146         return 1;  /* roundtrip */
   1147     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1148         return -1;  /* fallback taken */
   1149     } else {
   1150         return 0;  /* no mapping */
   1151     }
   1152 }
   1153 
   1154 /*
   1155  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1156  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1157  * to move it to the ISO 2022 range 21..7E.
   1158  * Return 0 if out of range.
   1159  */
   1160 static U_INLINE uint32_t
   1161 _2022FromGR94DBCS(uint32_t value) {
   1162     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1163         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1164     ) {
   1165         return value - 0x8080;  /* shift down to 21..7e byte range */
   1166     } else {
   1167         return 0;  /* not valid for ISO 2022 */
   1168     }
   1169 }
   1170 
   1171 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1172 /*
   1173  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1174  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1175  * unchanged.
   1176  */
   1177 static U_INLINE uint32_t
   1178 _2022ToGR94DBCS(uint32_t value) {
   1179     uint32_t returnValue = value + 0x8080;
   1180     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1181         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1182         return returnValue;
   1183     } else {
   1184         return value;
   1185     }
   1186 }
   1187 #endif
   1188 
   1189 #ifdef U_ENABLE_GENERIC_ISO_2022
   1190 
   1191 /**********************************************************************************
   1192 *  ISO-2022 Converter
   1193 *
   1194 *
   1195 */
   1196 
   1197 static void
   1198 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1199                                                            UErrorCode* err){
   1200     const char* mySourceLimit, *realSourceLimit;
   1201     const char* sourceStart;
   1202     const UChar* myTargetStart;
   1203     UConverter* saveThis;
   1204     UConverterDataISO2022* myData;
   1205     int8_t length;
   1206 
   1207     saveThis = args->converter;
   1208     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1209 
   1210     realSourceLimit = args->sourceLimit;
   1211     while (args->source < realSourceLimit) {
   1212         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1213             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1214             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1215 
   1216             if(args->source < mySourceLimit) {
   1217                 if(myData->currentConverter==NULL) {
   1218                     myData->currentConverter = ucnv_open("ASCII",err);
   1219                     if(U_FAILURE(*err)){
   1220                         return;
   1221                     }
   1222 
   1223                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1224                     saveThis->mode = UCNV_SO;
   1225                 }
   1226 
   1227                 /* convert to before the ESC or until the end of the buffer */
   1228                 myData->isFirstBuffer=FALSE;
   1229                 sourceStart = args->source;
   1230                 myTargetStart = args->target;
   1231                 args->converter = myData->currentConverter;
   1232                 ucnv_toUnicode(args->converter,
   1233                     &args->target,
   1234                     args->targetLimit,
   1235                     &args->source,
   1236                     mySourceLimit,
   1237                     args->offsets,
   1238                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1239                     err);
   1240                 args->converter = saveThis;
   1241 
   1242                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1243                     /* move the overflow buffer */
   1244                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1245                     myData->currentConverter->UCharErrorBufferLength = 0;
   1246                     if(length > 0) {
   1247                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1248                                     myData->currentConverter->UCharErrorBuffer,
   1249                                     length*U_SIZEOF_UCHAR);
   1250                     }
   1251                     return;
   1252                 }
   1253 
   1254                 /*
   1255                  * At least one of:
   1256                  * -Error while converting
   1257                  * -Done with entire buffer
   1258                  * -Need to write offsets or update the current offset
   1259                  *  (leave that up to the code in ucnv.c)
   1260                  *
   1261                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1262                  */
   1263                 if (U_FAILURE(*err) ||
   1264                     (args->source == realSourceLimit) ||
   1265                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1266                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1267                 ) {
   1268                     /* copy partial or error input for truncated detection and error handling */
   1269                     if(U_FAILURE(*err)) {
   1270                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1271                         if(length > 0) {
   1272                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1273                         }
   1274                     } else {
   1275                         length = saveThis->toULength = myData->currentConverter->toULength;
   1276                         if(length > 0) {
   1277                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1278                             if(args->source < mySourceLimit) {
   1279                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1280                             }
   1281                         }
   1282                     }
   1283                     return;
   1284                 }
   1285             }
   1286         }
   1287 
   1288         sourceStart = args->source;
   1289         changeState_2022(args->converter,
   1290                &(args->source),
   1291                realSourceLimit,
   1292                ISO_2022,
   1293                err);
   1294         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1295             /* let the ucnv.c code update its current offset */
   1296             return;
   1297         }
   1298     }
   1299 }
   1300 
   1301 #endif
   1302 
   1303 /*
   1304  * To Unicode Callback helper function
   1305  */
   1306 static void
   1307 toUnicodeCallback(UConverter *cnv,
   1308                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1309                   UErrorCode* err){
   1310     if(sourceChar>0xff){
   1311         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1312         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1313         cnv->toULength = 2;
   1314     }
   1315     else{
   1316         cnv->toUBytes[0] =(char) sourceChar;
   1317         cnv->toULength = 1;
   1318     }
   1319 
   1320     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1321         *err = U_INVALID_CHAR_FOUND;
   1322     }
   1323     else{
   1324         *err = U_ILLEGAL_CHAR_FOUND;
   1325     }
   1326 }
   1327 
   1328 /**************************************ISO-2022-JP*************************************************/
   1329 
   1330 /************************************** IMPORTANT **************************************************
   1331 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1332 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1333 * The converter iterates over each Unicode codepoint
   1334 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1335 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1336 * would do as far as possible.
   1337 *
   1338 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1339 * sure that ISO-2022 is also changed.
   1340 ***************************************************************************************************
   1341 */
   1342 
   1343 /***************************************************************************************************
   1344 * Rules for ISO-2022-jp encoding
   1345 * (i)   Escape sequences must be fully contained within a line they should not
   1346 *       span new lines or CRs
   1347 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1348 *       JIS-Roman character escape sequence should follow before the line terminates
   1349 * (iii) If the first character on the line is represented by two bytes then a two
   1350 *       byte character escape sequence should precede it
   1351 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1352 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1353 *       and invoked with SS2 (ESC N).
   1354 * (vi)  If there is any G0 designation in text, there must be a switch to
   1355 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1356 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1357 *       characters such as tab or CRLF.
   1358 * (vi)  Supported encodings:
   1359 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1360 *
   1361 *  source : RFC-1554
   1362 *
   1363 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1364 *          KSC5601 : alias to ibm-949 mapping table
   1365 *          GB2312 : alias to ibm-1386 mapping table
   1366 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1367 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1368 */
   1369 
   1370 /* preference order of JP charsets */
   1371 static const StateEnum jpCharsetPref[]={
   1372     ASCII,
   1373     JISX201,
   1374     ISO8859_1,
   1375     ISO8859_7,
   1376     JISX208,
   1377     JISX212,
   1378     GB2312,
   1379     KSC5601,
   1380     HWKANA_7BIT
   1381 };
   1382 
   1383 /*
   1384  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1385  * not in order of jpCharsetPref[]!
   1386  */
   1387 static const char escSeqChars[][6] ={
   1388     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1389     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1390     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1391     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1392     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1393     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1394     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1395     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1396     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1397 
   1398 };
   1399 static  const int8_t escSeqCharsLen[] ={
   1400     3, /* length of <ESC>(B  ASCII       */
   1401     3, /* length of <ESC>.A  ISO-8859-1  */
   1402     3, /* length of <ESC>.F  ISO-8859-7  */
   1403     3, /* length of <ESC>(J  JISX-201    */
   1404     3, /* length of <ESC>$B  JISX-208    */
   1405     4, /* length of <ESC>$(D JISX-212    */
   1406     3, /* length of <ESC>$A  GB2312      */
   1407     4, /* length of <ESC>$(C KSC5601     */
   1408     3  /* length of <ESC>(I  HWKANA_7BIT */
   1409 };
   1410 
   1411 /*
   1412 * The iteration over various code pages works this way:
   1413 * i)   Get the currentState from myConverterData->currentState
   1414 * ii)  Check if the character is mapped to a valid character in the currentState
   1415 *      Yes ->  a) set the initIterState to currentState
   1416 *       b) remain in this state until an invalid character is found
   1417 *      No  ->  a) go to the next code page and find the character
   1418 * iii) Before changing the state increment the current state check if the current state
   1419 *      is equal to the intitIteration state
   1420 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1421 *       break and return a U_INVALID_CHARACTER error
   1422 *      No  ->  Continue and find the character in next code page
   1423 *
   1424 *
   1425 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1426 */
   1427 
   1428 /* Map 00..7F to Unicode according to JIS X 0201. */
   1429 static U_INLINE uint32_t
   1430 jisx201ToU(uint32_t value) {
   1431     if(value < 0x5c) {
   1432         return value;
   1433     } else if(value == 0x5c) {
   1434         return 0xa5;
   1435     } else if(value == 0x7e) {
   1436         return 0x203e;
   1437     } else /* value <= 0x7f */ {
   1438         return value;
   1439     }
   1440 }
   1441 
   1442 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1443 static U_INLINE uint32_t
   1444 jisx201FromU(uint32_t value) {
   1445     if(value<=0x7f) {
   1446         if(value!=0x5c && value!=0x7e) {
   1447             return value;
   1448         }
   1449     } else if(value==0xa5) {
   1450         return 0x5c;
   1451     } else if(value==0x203e) {
   1452         return 0x7e;
   1453     }
   1454     return 0xfffe;
   1455 }
   1456 
   1457 /*
   1458  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1459  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1460  * Return 0 if the byte pair is out of range.
   1461  */
   1462 static U_INLINE uint32_t
   1463 _2022FromSJIS(uint32_t value) {
   1464     uint8_t trail;
   1465 
   1466     if(value > 0xEFFC) {
   1467         return 0;  /* beyond JIS X 0208 */
   1468     }
   1469 
   1470     trail = (uint8_t)value;
   1471 
   1472     value &= 0xff00;  /* lead byte */
   1473     if(value <= 0x9f00) {
   1474         value -= 0x7000;
   1475     } else /* 0xe000 <= value <= 0xef00 */ {
   1476         value -= 0xb000;
   1477     }
   1478     value <<= 1;
   1479 
   1480     if(trail <= 0x9e) {
   1481         value -= 0x100;
   1482         if(trail <= 0x7e) {
   1483             value |= trail - 0x1f;
   1484         } else {
   1485             value |= trail - 0x20;
   1486         }
   1487     } else /* trail <= 0xfc */ {
   1488         value |= trail - 0x7e;
   1489     }
   1490     return value;
   1491 }
   1492 
   1493 /*
   1494  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1495  * If either byte is outside 21..7E make sure that the result is not valid
   1496  * for Shift-JIS so that the converter catches it.
   1497  * Some invalid byte values already turn into equally invalid Shift-JIS
   1498  * byte values and need not be tested explicitly.
   1499  */
   1500 static U_INLINE void
   1501 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1502     if(c1&1) {
   1503         ++c1;
   1504         if(c2 <= 0x5f) {
   1505             c2 += 0x1f;
   1506         } else if(c2 <= 0x7e) {
   1507             c2 += 0x20;
   1508         } else {
   1509             c2 = 0;  /* invalid */
   1510         }
   1511     } else {
   1512         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1513             c2 += 0x7e;
   1514         } else {
   1515             c2 = 0;  /* invalid */
   1516         }
   1517     }
   1518     c1 >>= 1;
   1519     if(c1 <= 0x2f) {
   1520         c1 += 0x70;
   1521     } else if(c1 <= 0x3f) {
   1522         c1 += 0xb0;
   1523     } else {
   1524         c1 = 0;  /* invalid */
   1525     }
   1526     bytes[0] = (char)c1;
   1527     bytes[1] = (char)c2;
   1528 }
   1529 
   1530 /*
   1531  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1532  * Katakana.
   1533  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1534  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1535  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1536  */
   1537 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1538     0x2123,  /* U+FF61 */
   1539     0x2156,
   1540     0x2157,
   1541     0x2122,
   1542     0x2126,
   1543     0x2572,
   1544     0x2521,
   1545     0x2523,
   1546     0x2525,
   1547     0x2527,
   1548     0x2529,
   1549     0x2563,
   1550     0x2565,
   1551     0x2567,
   1552     0x2543,
   1553     0x213C,  /* U+FF70 */
   1554     0x2522,
   1555     0x2524,
   1556     0x2526,
   1557     0x2528,
   1558     0x252A,
   1559     0x252B,
   1560     0x252D,
   1561     0x252F,
   1562     0x2531,
   1563     0x2533,
   1564     0x2535,
   1565     0x2537,
   1566     0x2539,
   1567     0x253B,
   1568     0x253D,
   1569     0x253F,  /* U+FF80 */
   1570     0x2541,
   1571     0x2544,
   1572     0x2546,
   1573     0x2548,
   1574     0x254A,
   1575     0x254B,
   1576     0x254C,
   1577     0x254D,
   1578     0x254E,
   1579     0x254F,
   1580     0x2552,
   1581     0x2555,
   1582     0x2558,
   1583     0x255B,
   1584     0x255E,
   1585     0x255F,  /* U+FF90 */
   1586     0x2560,
   1587     0x2561,
   1588     0x2562,
   1589     0x2564,
   1590     0x2566,
   1591     0x2568,
   1592     0x2569,
   1593     0x256A,
   1594     0x256B,
   1595     0x256C,
   1596     0x256D,
   1597     0x256F,
   1598     0x2573,
   1599     0x212B,
   1600     0x212C   /* U+FF9F */
   1601 };
   1602 
   1603 static void
   1604 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1605     UConverter *cnv = args->converter;
   1606     UConverterDataISO2022 *converterData;
   1607     ISO2022State *pFromU2022State;
   1608     uint8_t *target = (uint8_t *) args->target;
   1609     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1610     const UChar* source = args->source;
   1611     const UChar* sourceLimit = args->sourceLimit;
   1612     int32_t* offsets = args->offsets;
   1613     UChar32 sourceChar;
   1614     char buffer[8];
   1615     int32_t len, outLen;
   1616     int8_t choices[10];
   1617     int32_t choiceCount;
   1618     uint32_t targetValue = 0;
   1619     UBool useFallback;
   1620 
   1621     int32_t i;
   1622     int8_t cs, g;
   1623 
   1624     /* set up the state */
   1625     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1626     pFromU2022State   = &converterData->fromU2022State;
   1627 
   1628     choiceCount = 0;
   1629 
   1630     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1631     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1632         goto getTrail;
   1633     }
   1634 
   1635     while(source < sourceLimit) {
   1636         if(target < targetLimit) {
   1637 
   1638             sourceChar  = *(source++);
   1639             /*check if the char is a First surrogate*/
   1640             if(UTF_IS_SURROGATE(sourceChar)) {
   1641                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   1642 getTrail:
   1643                     /*look ahead to find the trail surrogate*/
   1644                     if(source < sourceLimit) {
   1645                         /* test the following code unit */
   1646                         UChar trail=(UChar) *source;
   1647                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   1648                             source++;
   1649                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   1650                             cnv->fromUChar32=0x00;
   1651                             /* convert this supplementary code point */
   1652                             /* exit this condition tree */
   1653                         } else {
   1654                             /* this is an unmatched lead code unit (1st surrogate) */
   1655                             /* callback(illegal) */
   1656                             *err=U_ILLEGAL_CHAR_FOUND;
   1657                             cnv->fromUChar32=sourceChar;
   1658                             break;
   1659                         }
   1660                     } else {
   1661                         /* no more input */
   1662                         cnv->fromUChar32=sourceChar;
   1663                         break;
   1664                     }
   1665                 } else {
   1666                     /* this is an unmatched trail code unit (2nd surrogate) */
   1667                     /* callback(illegal) */
   1668                     *err=U_ILLEGAL_CHAR_FOUND;
   1669                     cnv->fromUChar32=sourceChar;
   1670                     break;
   1671                 }
   1672             }
   1673 
   1674             /* do not convert SO/SI/ESC */
   1675             if(IS_2022_CONTROL(sourceChar)) {
   1676                 /* callback(illegal) */
   1677                 *err=U_ILLEGAL_CHAR_FOUND;
   1678                 cnv->fromUChar32=sourceChar;
   1679                 break;
   1680             }
   1681 
   1682             /* do the conversion */
   1683 
   1684             if(choiceCount == 0) {
   1685                 uint16_t csm;
   1686 
   1687                 /*
   1688                  * The csm variable keeps track of which charsets are allowed
   1689                  * and not used yet while building the choices[].
   1690                  */
   1691                 csm = jpCharsetMasks[converterData->version];
   1692                 choiceCount = 0;
   1693 
   1694                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1695                 if(converterData->version == 3 || converterData->version == 4) {
   1696                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1697                 }
   1698                 /* Do not try single-byte half-width Katakana for other versions. */
   1699                 csm &= ~CSM(HWKANA_7BIT);
   1700 
   1701                 /* try the current G0 charset */
   1702                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1703                 csm &= ~CSM(cs);
   1704 
   1705                 /* try the current G2 charset */
   1706                 if((cs = pFromU2022State->cs[2]) != 0) {
   1707                     choices[choiceCount++] = cs;
   1708                     csm &= ~CSM(cs);
   1709                 }
   1710 
   1711                 /* try all the other possible charsets */
   1712                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1713                     cs = (int8_t)jpCharsetPref[i];
   1714                     if(CSM(cs) & csm) {
   1715                         choices[choiceCount++] = cs;
   1716                         csm &= ~CSM(cs);
   1717                     }
   1718                 }
   1719             }
   1720 
   1721             cs = g = 0;
   1722             /*
   1723              * len==0: no mapping found yet
   1724              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1725              * len>0: found a roundtrip result, done
   1726              */
   1727             len = 0;
   1728             /*
   1729              * We will turn off useFallback after finding a fallback,
   1730              * but we still get fallbacks from PUA code points as usual.
   1731              * Therefore, we will also need to check that we don't overwrite
   1732              * an early fallback with a later one.
   1733              */
   1734             useFallback = cnv->useFallback;
   1735 
   1736             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1737                 uint32_t value;
   1738                 int32_t len2;
   1739                 int8_t cs0 = choices[i];
   1740                 switch(cs0) {
   1741                 case ASCII:
   1742                     if(sourceChar <= 0x7f) {
   1743                         targetValue = (uint32_t)sourceChar;
   1744                         len = 1;
   1745                         cs = cs0;
   1746                         g = 0;
   1747                     }
   1748                     break;
   1749                 case ISO8859_1:
   1750                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1751                         targetValue = (uint32_t)sourceChar - 0x80;
   1752                         len = 1;
   1753                         cs = cs0;
   1754                         g = 2;
   1755                     }
   1756                     break;
   1757                 case HWKANA_7BIT:
   1758                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1759                         if(converterData->version==3) {
   1760                             /* JIS7: use G1 (SO) */
   1761                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1762                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1763                             len = 1;
   1764                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1765                             g = 1;
   1766                         } else if(converterData->version==4) {
   1767                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1768                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1769                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1770                             len = 1;
   1771 
   1772                             cs = pFromU2022State->cs[0];
   1773                             if(IS_JP_DBCS(cs)) {
   1774                                 /* switch from a DBCS charset to JISX201 */
   1775                                 cs = (int8_t)JISX201;
   1776                             }
   1777                             /* else stay in the current G0 charset */
   1778                             g = 0;
   1779                         }
   1780                         /* else do not use HWKANA_7BIT with other versions */
   1781                     }
   1782                     break;
   1783                 case JISX201:
   1784                     /* G0 SBCS */
   1785                     value = jisx201FromU(sourceChar);
   1786                     if(value <= 0x7f) {
   1787                         targetValue = value;
   1788                         len = 1;
   1789                         cs = cs0;
   1790                         g = 0;
   1791                         useFallback = FALSE;
   1792                     }
   1793                     break;
   1794                 case JISX208:
   1795                     /* G0 DBCS from Shift-JIS table */
   1796                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1797                                 converterData->myConverterArray[cs0],
   1798                                 sourceChar, &value,
   1799                                 useFallback, MBCS_OUTPUT_2);
   1800                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1801                         value = _2022FromSJIS(value);
   1802                         if(value != 0) {
   1803                             targetValue = value;
   1804                             len = len2;
   1805                             cs = cs0;
   1806                             g = 0;
   1807                             useFallback = FALSE;
   1808                         }
   1809                     } else if(len == 0 && useFallback &&
   1810                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1811                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1812                         len = -2;
   1813                         cs = cs0;
   1814                         g = 0;
   1815                         useFallback = FALSE;
   1816                     }
   1817                     break;
   1818                 case ISO8859_7:
   1819                     /* G0 SBCS forced to 7-bit output */
   1820                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1821                                 converterData->myConverterArray[cs0],
   1822                                 sourceChar, &value,
   1823                                 useFallback);
   1824                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1825                         targetValue = value - 0x80;
   1826                         len = len2;
   1827                         cs = cs0;
   1828                         g = 2;
   1829                         useFallback = FALSE;
   1830                     }
   1831                     break;
   1832                 default:
   1833                     /* G0 DBCS */
   1834                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1835                                 converterData->myConverterArray[cs0],
   1836                                 sourceChar, &value,
   1837                                 useFallback, MBCS_OUTPUT_2);
   1838                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1839                         if(cs0 == KSC5601) {
   1840                             /*
   1841                              * Check for valid bytes for the encoding scheme.
   1842                              * This is necessary because the sub-converter (windows-949)
   1843                              * has a broader encoding scheme than is valid for 2022.
   1844                              */
   1845                             value = _2022FromGR94DBCS(value);
   1846                             if(value == 0) {
   1847                                 break;
   1848                             }
   1849                         }
   1850                         targetValue = value;
   1851                         len = len2;
   1852                         cs = cs0;
   1853                         g = 0;
   1854                         useFallback = FALSE;
   1855                     }
   1856                     break;
   1857                 }
   1858             }
   1859 
   1860             if(len != 0) {
   1861                 if(len < 0) {
   1862                     len = -len;  /* fallback */
   1863                 }
   1864                 outLen = 0; /* count output bytes */
   1865 
   1866                 /* write SI if necessary (only for JIS7) */
   1867                 if(pFromU2022State->g == 1 && g == 0) {
   1868                     buffer[outLen++] = UCNV_SI;
   1869                     pFromU2022State->g = 0;
   1870                 }
   1871 
   1872                 /* write the designation sequence if necessary */
   1873                 if(cs != pFromU2022State->cs[g]) {
   1874                     int32_t escLen = escSeqCharsLen[cs];
   1875                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1876                     outLen += escLen;
   1877                     pFromU2022State->cs[g] = cs;
   1878 
   1879                     /* invalidate the choices[] */
   1880                     choiceCount = 0;
   1881                 }
   1882 
   1883                 /* write the shift sequence if necessary */
   1884                 if(g != pFromU2022State->g) {
   1885                     switch(g) {
   1886                     /* case 0 handled before writing escapes */
   1887                     case 1:
   1888                         buffer[outLen++] = UCNV_SO;
   1889                         pFromU2022State->g = 1;
   1890                         break;
   1891                     default: /* case 2 */
   1892                         buffer[outLen++] = 0x1b;
   1893                         buffer[outLen++] = 0x4e;
   1894                         break;
   1895                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1896                     }
   1897                 }
   1898 
   1899                 /* write the output bytes */
   1900                 if(len == 1) {
   1901                     buffer[outLen++] = (char)targetValue;
   1902                 } else /* len == 2 */ {
   1903                     buffer[outLen++] = (char)(targetValue >> 8);
   1904                     buffer[outLen++] = (char)targetValue;
   1905                 }
   1906             } else {
   1907                 /*
   1908                  * if we cannot find the character after checking all codepages
   1909                  * then this is an error
   1910                  */
   1911                 *err = U_INVALID_CHAR_FOUND;
   1912                 cnv->fromUChar32=sourceChar;
   1913                 break;
   1914             }
   1915 
   1916             if(sourceChar == CR || sourceChar == LF) {
   1917                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1918                 pFromU2022State->cs[2] = 0;
   1919                 choiceCount = 0;
   1920             }
   1921 
   1922             /* output outLen>0 bytes in buffer[] */
   1923             if(outLen == 1) {
   1924                 *target++ = buffer[0];
   1925                 if(offsets) {
   1926                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1927                 }
   1928             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1929                 *target++ = buffer[0];
   1930                 *target++ = buffer[1];
   1931                 if(offsets) {
   1932                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1933                     *offsets++ = sourceIndex;
   1934                     *offsets++ = sourceIndex;
   1935                 }
   1936             } else {
   1937                 fromUWriteUInt8(
   1938                     cnv,
   1939                     buffer, outLen,
   1940                     &target, (const char *)targetLimit,
   1941                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1942                     err);
   1943                 if(U_FAILURE(*err)) {
   1944                     break;
   1945                 }
   1946             }
   1947         } /* end if(myTargetIndex<myTargetLength) */
   1948         else{
   1949             *err =U_BUFFER_OVERFLOW_ERROR;
   1950             break;
   1951         }
   1952 
   1953     }/* end while(mySourceIndex<mySourceLength) */
   1954 
   1955     /*
   1956      * the end of the input stream and detection of truncated input
   1957      * are handled by the framework, but for ISO-2022-JP conversion
   1958      * we need to be in ASCII mode at the very end
   1959      *
   1960      * conditions:
   1961      *   successful
   1962      *   in SO mode or not in ASCII mode
   1963      *   end of input and no truncated input
   1964      */
   1965     if( U_SUCCESS(*err) &&
   1966         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   1967         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   1968     ) {
   1969         int32_t sourceIndex;
   1970 
   1971         outLen = 0;
   1972 
   1973         if(pFromU2022State->g != 0) {
   1974             buffer[outLen++] = UCNV_SI;
   1975             pFromU2022State->g = 0;
   1976         }
   1977 
   1978         if(pFromU2022State->cs[0] != ASCII) {
   1979             int32_t escLen = escSeqCharsLen[ASCII];
   1980             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   1981             outLen += escLen;
   1982             pFromU2022State->cs[0] = (int8_t)ASCII;
   1983         }
   1984 
   1985         /* get the source index of the last input character */
   1986         /*
   1987          * TODO this would be simpler and more reliable if we used a pair
   1988          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   1989          * so that we could simply use the prevSourceIndex here;
   1990          * this code gives an incorrect result for the rare case of an unmatched
   1991          * trail surrogate that is alone in the last buffer of the text stream
   1992          */
   1993         sourceIndex=(int32_t)(source-args->source);
   1994         if(sourceIndex>0) {
   1995             --sourceIndex;
   1996             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   1997                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   1998             ) {
   1999                 --sourceIndex;
   2000             }
   2001         } else {
   2002             sourceIndex=-1;
   2003         }
   2004 
   2005         fromUWriteUInt8(
   2006             cnv,
   2007             buffer, outLen,
   2008             &target, (const char *)targetLimit,
   2009             &offsets, sourceIndex,
   2010             err);
   2011     }
   2012 
   2013     /*save the state and return */
   2014     args->source = source;
   2015     args->target = (char*)target;
   2016 }
   2017 
   2018 /*************** to unicode *******************/
   2019 
   2020 static void
   2021 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2022                                                UErrorCode* err){
   2023     char tempBuf[2];
   2024     const char *mySource = (char *) args->source;
   2025     UChar *myTarget = args->target;
   2026     const char *mySourceLimit = args->sourceLimit;
   2027     uint32_t targetUniChar = 0x0000;
   2028     uint32_t mySourceChar = 0x0000;
   2029     uint32_t tmpSourceChar = 0x0000;
   2030     UConverterDataISO2022* myData;
   2031     ISO2022State *pToU2022State;
   2032     StateEnum cs;
   2033 
   2034     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2035     pToU2022State = &myData->toU2022State;
   2036 
   2037     if(myData->key != 0) {
   2038         /* continue with a partial escape sequence */
   2039         goto escape;
   2040     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2041         /* continue with a partial double-byte character */
   2042         mySourceChar = args->converter->toUBytes[0];
   2043         args->converter->toULength = 0;
   2044         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2045         targetUniChar = missingCharMarker;
   2046         goto getTrailByte;
   2047     }
   2048 
   2049     while(mySource < mySourceLimit){
   2050 
   2051         targetUniChar =missingCharMarker;
   2052 
   2053         if(myTarget < args->targetLimit){
   2054 
   2055             mySourceChar= (unsigned char) *mySource++;
   2056 
   2057             switch(mySourceChar) {
   2058             case UCNV_SI:
   2059                 if(myData->version==3) {
   2060                     pToU2022State->g=0;
   2061                     continue;
   2062                 } else {
   2063                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2064                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2065                     break;
   2066                 }
   2067 
   2068             case UCNV_SO:
   2069                 if(myData->version==3) {
   2070                     /* JIS7: switch to G1 half-width Katakana */
   2071                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2072                     pToU2022State->g=1;
   2073                     continue;
   2074                 } else {
   2075                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2076                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2077                     break;
   2078                 }
   2079 
   2080             case ESC_2022:
   2081                 mySource--;
   2082 escape:
   2083                 {
   2084                     const char * mySourceBefore = mySource;
   2085                     int8_t toULengthBefore = args->converter->toULength;
   2086 
   2087                     changeState_2022(args->converter,&(mySource),
   2088                         mySourceLimit, ISO_2022_JP,err);
   2089 
   2090                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2091                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2092                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2093                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2094                         args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
   2095                     }
   2096                 }
   2097 
   2098                 /* invalid or illegal escape sequence */
   2099                 if(U_FAILURE(*err)){
   2100                     args->target = myTarget;
   2101                     args->source = mySource;
   2102                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2103                     return;
   2104                 }
   2105                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2106                 if(myData->key==0) {
   2107                     myData->isEmptySegment = TRUE;
   2108                 }
   2109                 continue;
   2110 
   2111             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2112 
   2113             case CR:
   2114                 /*falls through*/
   2115             case LF:
   2116                 /* automatically reset to single-byte mode */
   2117                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2118                     pToU2022State->cs[0] = (int8_t)ASCII;
   2119                 }
   2120                 pToU2022State->cs[2] = 0;
   2121                 pToU2022State->g = 0;
   2122                 /* falls through */
   2123             default:
   2124                 /* convert one or two bytes */
   2125                 myData->isEmptySegment = FALSE;
   2126                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2127                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2128                     !IS_JP_DBCS(cs)
   2129                 ) {
   2130                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2131                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2132 
   2133                     /* return from a single-shift state to the previous one */
   2134                     if(pToU2022State->g >= 2) {
   2135                         pToU2022State->g=pToU2022State->prevG;
   2136                     }
   2137                 } else switch(cs) {
   2138                 case ASCII:
   2139                     if(mySourceChar <= 0x7f) {
   2140                         targetUniChar = mySourceChar;
   2141                     }
   2142                     break;
   2143                 case ISO8859_1:
   2144                     if(mySourceChar <= 0x7f) {
   2145                         targetUniChar = mySourceChar + 0x80;
   2146                     }
   2147                     /* return from a single-shift state to the previous one */
   2148                     pToU2022State->g=pToU2022State->prevG;
   2149                     break;
   2150                 case ISO8859_7:
   2151                     if(mySourceChar <= 0x7f) {
   2152                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2153                         targetUniChar =
   2154                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2155                                 myData->myConverterArray[cs],
   2156                                 mySourceChar + 0x80);
   2157                     }
   2158                     /* return from a single-shift state to the previous one */
   2159                     pToU2022State->g=pToU2022State->prevG;
   2160                     break;
   2161                 case JISX201:
   2162                     if(mySourceChar <= 0x7f) {
   2163                         targetUniChar = jisx201ToU(mySourceChar);
   2164                     }
   2165                     break;
   2166                 case HWKANA_7BIT:
   2167                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2168                         /* 7-bit halfwidth Katakana */
   2169                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2170                     }
   2171                     break;
   2172                 default:
   2173                     /* G0 DBCS */
   2174                     if(mySource < mySourceLimit) {
   2175                         int leadIsOk, trailIsOk;
   2176                         uint8_t trailByte;
   2177 getTrailByte:
   2178                         trailByte = (uint8_t)*mySource;
   2179                         /*
   2180                          * Ticket 5691: consistent illegal sequences:
   2181                          * - We include at least the first byte in the illegal sequence.
   2182                          * - If any of the non-initial bytes could be the start of a character,
   2183                          *   we stop the illegal sequence before the first one of those.
   2184                          *
   2185                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2186                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2187                          * Otherwise we convert or report the pair of bytes.
   2188                          */
   2189                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2190                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2191                         if (leadIsOk && trailIsOk) {
   2192                             ++mySource;
   2193                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2194                             if(cs == JISX208) {
   2195                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2196                                 mySourceChar = tmpSourceChar;
   2197                             } else {
   2198                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2199                                 mySourceChar = tmpSourceChar;
   2200                                 if (cs == KSC5601) {
   2201                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2202                                 }
   2203                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2204                                 tempBuf[1] = (char)(tmpSourceChar);
   2205                             }
   2206                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2207                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2208                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2209                             ++mySource;
   2210                             /* add another bit so that the code below writes 2 bytes in case of error */
   2211                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2212                         }
   2213                     } else {
   2214                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2215                         args->converter->toULength = 1;
   2216                         goto endloop;
   2217                     }
   2218                 }  /* End of inner switch */
   2219                 break;
   2220             }  /* End of outer switch */
   2221             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2222                 if(args->offsets){
   2223                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2224                 }
   2225                 *(myTarget++)=(UChar)targetUniChar;
   2226             }
   2227             else if(targetUniChar > missingCharMarker){
   2228                 /* disassemble the surrogate pair and write to output*/
   2229                 targetUniChar-=0x0010000;
   2230                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2231                 if(args->offsets){
   2232                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2233                 }
   2234                 ++myTarget;
   2235                 if(myTarget< args->targetLimit){
   2236                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2237                     if(args->offsets){
   2238                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2239                     }
   2240                     ++myTarget;
   2241                 }else{
   2242                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2243                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2244                 }
   2245 
   2246             }
   2247             else{
   2248                 /* Call the callback function*/
   2249                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2250                 break;
   2251             }
   2252         }
   2253         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2254             *err =U_BUFFER_OVERFLOW_ERROR;
   2255             break;
   2256         }
   2257     }
   2258 endloop:
   2259     args->target = myTarget;
   2260     args->source = mySource;
   2261 }
   2262 
   2263 
   2264 /***************************************************************
   2265 *   Rules for ISO-2022-KR encoding
   2266 *   i) The KSC5601 designator sequence should appear only once in a file,
   2267 *      at the begining of a line before any KSC5601 characters. This usually
   2268 *      means that it appears by itself on the first line of the file
   2269 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2270 *      and SI to shift into single byte mode
   2271 */
   2272 static void
   2273 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2274 
   2275     UConverter* saveConv = args->converter;
   2276     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2277     args->converter=myConverterData->currentConverter;
   2278 
   2279     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2280     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2281     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2282 
   2283     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2284         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2285             uprv_memcpy(
   2286                 saveConv->charErrorBuffer,
   2287                 myConverterData->currentConverter->charErrorBuffer,
   2288                 myConverterData->currentConverter->charErrorBufferLength);
   2289         }
   2290         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2291         myConverterData->currentConverter->charErrorBufferLength = 0;
   2292     }
   2293     args->converter=saveConv;
   2294 }
   2295 
   2296 static void
   2297 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2298 
   2299     const UChar *source = args->source;
   2300     const UChar *sourceLimit = args->sourceLimit;
   2301     unsigned char *target = (unsigned char *) args->target;
   2302     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2303     int32_t* offsets = args->offsets;
   2304     uint32_t targetByteUnit = 0x0000;
   2305     UChar32 sourceChar = 0x0000;
   2306     UBool isTargetByteDBCS;
   2307     UBool oldIsTargetByteDBCS;
   2308     UConverterDataISO2022 *converterData;
   2309     UConverterSharedData* sharedData;
   2310     UBool useFallback;
   2311     int32_t length =0;
   2312 
   2313     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2314     /* if the version is 1 then the user is requesting
   2315      * conversion with ibm-25546 pass the arguments to
   2316      * MBCS converter and return
   2317      */
   2318     if(converterData->version==1){
   2319         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2320         return;
   2321     }
   2322 
   2323     /* initialize data */
   2324     sharedData = converterData->currentConverter->sharedData;
   2325     useFallback = args->converter->useFallback;
   2326     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2327     oldIsTargetByteDBCS = isTargetByteDBCS;
   2328 
   2329     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2330     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2331         goto getTrail;
   2332     }
   2333     while(source < sourceLimit){
   2334 
   2335         targetByteUnit = missingCharMarker;
   2336 
   2337         if(target < (unsigned char*) args->targetLimit){
   2338             sourceChar = *source++;
   2339 
   2340             /* do not convert SO/SI/ESC */
   2341             if(IS_2022_CONTROL(sourceChar)) {
   2342                 /* callback(illegal) */
   2343                 *err=U_ILLEGAL_CHAR_FOUND;
   2344                 args->converter->fromUChar32=sourceChar;
   2345                 break;
   2346             }
   2347 
   2348             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2349             if(length < 0) {
   2350                 length = -length;  /* fallback */
   2351             }
   2352             /* only DBCS or SBCS characters are expected*/
   2353             /* DB characters with high bit set to 1 are expected */
   2354             if( length > 2 || length==0 ||
   2355                 (length == 1 && targetByteUnit > 0x7f) ||
   2356                 (length == 2 &&
   2357                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2358                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2359             ) {
   2360                 targetByteUnit=missingCharMarker;
   2361             }
   2362             if (targetByteUnit != missingCharMarker){
   2363 
   2364                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2365                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2366                   /* append the shift sequence */
   2367                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2368 
   2369                     if (isTargetByteDBCS)
   2370                         *target++ = UCNV_SO;
   2371                     else
   2372                         *target++ = UCNV_SI;
   2373                     if(offsets)
   2374                         *(offsets++) = (int32_t)(source - args->source-1);
   2375                 }
   2376                 /* write the targetUniChar  to target */
   2377                 if(targetByteUnit <= 0x00FF){
   2378                     if( target < targetLimit){
   2379                         *(target++) = (unsigned char) targetByteUnit;
   2380                         if(offsets){
   2381                             *(offsets++) = (int32_t)(source - args->source-1);
   2382                         }
   2383 
   2384                     }else{
   2385                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2386                         *err = U_BUFFER_OVERFLOW_ERROR;
   2387                     }
   2388                 }else{
   2389                     if(target < targetLimit){
   2390                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2391                         if(offsets){
   2392                             *(offsets++) = (int32_t)(source - args->source-1);
   2393                         }
   2394                         if(target < targetLimit){
   2395                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2396                             if(offsets){
   2397                                 *(offsets++) = (int32_t)(source - args->source-1);
   2398                             }
   2399                         }else{
   2400                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2401                             *err = U_BUFFER_OVERFLOW_ERROR;
   2402                         }
   2403                     }else{
   2404                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2405                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2406                         *err = U_BUFFER_OVERFLOW_ERROR;
   2407                     }
   2408                 }
   2409 
   2410             }
   2411             else{
   2412                 /* oops.. the code point is unassingned
   2413                  * set the error and reason
   2414                  */
   2415 
   2416                 /*check if the char is a First surrogate*/
   2417                 if(UTF_IS_SURROGATE(sourceChar)) {
   2418                     if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2419 getTrail:
   2420                         /*look ahead to find the trail surrogate*/
   2421                         if(source <  sourceLimit) {
   2422                             /* test the following code unit */
   2423                             UChar trail=(UChar) *source;
   2424                             if(UTF_IS_SECOND_SURROGATE(trail)) {
   2425                                 source++;
   2426                                 sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2427                                 *err = U_INVALID_CHAR_FOUND;
   2428                                 /* convert this surrogate code point */
   2429                                 /* exit this condition tree */
   2430                             } else {
   2431                                 /* this is an unmatched lead code unit (1st surrogate) */
   2432                                 /* callback(illegal) */
   2433                                 *err=U_ILLEGAL_CHAR_FOUND;
   2434                             }
   2435                         } else {
   2436                             /* no more input */
   2437                             *err = U_ZERO_ERROR;
   2438                         }
   2439                     } else {
   2440                         /* this is an unmatched trail code unit (2nd surrogate) */
   2441                         /* callback(illegal) */
   2442                         *err=U_ILLEGAL_CHAR_FOUND;
   2443                     }
   2444                 } else {
   2445                     /* callback(unassigned) for a BMP code point */
   2446                     *err = U_INVALID_CHAR_FOUND;
   2447                 }
   2448 
   2449                 args->converter->fromUChar32=sourceChar;
   2450                 break;
   2451             }
   2452         } /* end if(myTargetIndex<myTargetLength) */
   2453         else{
   2454             *err =U_BUFFER_OVERFLOW_ERROR;
   2455             break;
   2456         }
   2457 
   2458     }/* end while(mySourceIndex<mySourceLength) */
   2459 
   2460     /*
   2461      * the end of the input stream and detection of truncated input
   2462      * are handled by the framework, but for ISO-2022-KR conversion
   2463      * we need to be in ASCII mode at the very end
   2464      *
   2465      * conditions:
   2466      *   successful
   2467      *   not in ASCII mode
   2468      *   end of input and no truncated input
   2469      */
   2470     if( U_SUCCESS(*err) &&
   2471         isTargetByteDBCS &&
   2472         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2473     ) {
   2474         int32_t sourceIndex;
   2475 
   2476         /* we are switching to ASCII */
   2477         isTargetByteDBCS=FALSE;
   2478 
   2479         /* get the source index of the last input character */
   2480         /*
   2481          * TODO this would be simpler and more reliable if we used a pair
   2482          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2483          * so that we could simply use the prevSourceIndex here;
   2484          * this code gives an incorrect result for the rare case of an unmatched
   2485          * trail surrogate that is alone in the last buffer of the text stream
   2486          */
   2487         sourceIndex=(int32_t)(source-args->source);
   2488         if(sourceIndex>0) {
   2489             --sourceIndex;
   2490             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2491                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2492             ) {
   2493                 --sourceIndex;
   2494             }
   2495         } else {
   2496             sourceIndex=-1;
   2497         }
   2498 
   2499         fromUWriteUInt8(
   2500             args->converter,
   2501             SHIFT_IN_STR, 1,
   2502             &target, (const char *)targetLimit,
   2503             &offsets, sourceIndex,
   2504             err);
   2505     }
   2506 
   2507     /*save the state and return */
   2508     args->source = source;
   2509     args->target = (char*)target;
   2510     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2511 }
   2512 
   2513 /************************ To Unicode ***************************************/
   2514 
   2515 static void
   2516 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2517                                                             UErrorCode* err){
   2518     char const* sourceStart;
   2519     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2520 
   2521     UConverterToUnicodeArgs subArgs;
   2522     int32_t minArgsSize;
   2523 
   2524     /* set up the subconverter arguments */
   2525     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2526         minArgsSize = args->size;
   2527     } else {
   2528         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2529     }
   2530 
   2531     uprv_memcpy(&subArgs, args, minArgsSize);
   2532     subArgs.size = (uint16_t)minArgsSize;
   2533     subArgs.converter = myData->currentConverter;
   2534 
   2535     /* remember the original start of the input for offsets */
   2536     sourceStart = args->source;
   2537 
   2538     if(myData->key != 0) {
   2539         /* continue with a partial escape sequence */
   2540         goto escape;
   2541     }
   2542 
   2543     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2544         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2545         subArgs.source = args->source;
   2546         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2547         if(subArgs.source != subArgs.sourceLimit) {
   2548             /*
   2549              * get the current partial byte sequence
   2550              *
   2551              * it needs to be moved between the public and the subconverter
   2552              * so that the conversion framework, which only sees the public
   2553              * converter, can handle truncated and illegal input etc.
   2554              */
   2555             if(args->converter->toULength > 0) {
   2556                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2557             }
   2558             subArgs.converter->toULength = args->converter->toULength;
   2559 
   2560             /*
   2561              * Convert up to the end of the input, or to before the next escape character.
   2562              * Does not handle conversion extensions because the preToU[] state etc.
   2563              * is not copied.
   2564              */
   2565             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2566 
   2567             if(args->offsets != NULL && sourceStart != args->source) {
   2568                 /* update offsets to base them on the actual start of the input */
   2569                 int32_t *offsets = args->offsets;
   2570                 UChar *target = args->target;
   2571                 int32_t delta = (int32_t)(args->source - sourceStart);
   2572                 while(target < subArgs.target) {
   2573                     if(*offsets >= 0) {
   2574                         *offsets += delta;
   2575                     }
   2576                     ++offsets;
   2577                     ++target;
   2578                 }
   2579             }
   2580             args->source = subArgs.source;
   2581             args->target = subArgs.target;
   2582             args->offsets = subArgs.offsets;
   2583 
   2584             /* copy input/error/overflow buffers */
   2585             if(subArgs.converter->toULength > 0) {
   2586                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2587             }
   2588             args->converter->toULength = subArgs.converter->toULength;
   2589 
   2590             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2591                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2592                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2593                                 subArgs.converter->UCharErrorBufferLength);
   2594                 }
   2595                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2596                 subArgs.converter->UCharErrorBufferLength = 0;
   2597             }
   2598         }
   2599 
   2600         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2601             return;
   2602         }
   2603 
   2604 escape:
   2605         changeState_2022(args->converter,
   2606                &(args->source),
   2607                args->sourceLimit,
   2608                ISO_2022_KR,
   2609                err);
   2610     }
   2611 }
   2612 
   2613 static void
   2614 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2615                                                             UErrorCode* err){
   2616     char tempBuf[2];
   2617     const char *mySource = ( char *) args->source;
   2618     UChar *myTarget = args->target;
   2619     const char *mySourceLimit = args->sourceLimit;
   2620     UChar32 targetUniChar = 0x0000;
   2621     UChar mySourceChar = 0x0000;
   2622     UConverterDataISO2022* myData;
   2623     UConverterSharedData* sharedData ;
   2624     UBool useFallback;
   2625 
   2626     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2627     if(myData->version==1){
   2628         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2629         return;
   2630     }
   2631 
   2632     /* initialize state */
   2633     sharedData = myData->currentConverter->sharedData;
   2634     useFallback = args->converter->useFallback;
   2635 
   2636     if(myData->key != 0) {
   2637         /* continue with a partial escape sequence */
   2638         goto escape;
   2639     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2640         /* continue with a partial double-byte character */
   2641         mySourceChar = args->converter->toUBytes[0];
   2642         args->converter->toULength = 0;
   2643         goto getTrailByte;
   2644     }
   2645 
   2646     while(mySource< mySourceLimit){
   2647 
   2648         if(myTarget < args->targetLimit){
   2649 
   2650             mySourceChar= (unsigned char) *mySource++;
   2651 
   2652             if(mySourceChar==UCNV_SI){
   2653                 myData->toU2022State.g = 0;
   2654                 if (myData->isEmptySegment) {
   2655                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2656                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2657                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2658                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2659                     args->converter->toULength = 1;
   2660                     args->target = myTarget;
   2661                     args->source = mySource;
   2662                     return;
   2663                 }
   2664                 /*consume the source */
   2665                 continue;
   2666             }else if(mySourceChar==UCNV_SO){
   2667                 myData->toU2022State.g = 1;
   2668                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2669                 /*consume the source */
   2670                 continue;
   2671             }else if(mySourceChar==ESC_2022){
   2672                 mySource--;
   2673 escape:
   2674                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2675                 changeState_2022(args->converter,&(mySource),
   2676                                 mySourceLimit, ISO_2022_KR, err);
   2677                 if(U_FAILURE(*err)){
   2678                     args->target = myTarget;
   2679                     args->source = mySource;
   2680                     return;
   2681                 }
   2682                 continue;
   2683             }
   2684 
   2685             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2686             if(myData->toU2022State.g == 1) {
   2687                 if(mySource < mySourceLimit) {
   2688                     int leadIsOk, trailIsOk;
   2689                     uint8_t trailByte;
   2690 getTrailByte:
   2691                     targetUniChar = missingCharMarker;
   2692                     trailByte = (uint8_t)*mySource;
   2693                     /*
   2694                      * Ticket 5691: consistent illegal sequences:
   2695                      * - We include at least the first byte in the illegal sequence.
   2696                      * - If any of the non-initial bytes could be the start of a character,
   2697                      *   we stop the illegal sequence before the first one of those.
   2698                      *
   2699                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2700                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2701                      * Otherwise we convert or report the pair of bytes.
   2702                      */
   2703                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2704                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2705                     if (leadIsOk && trailIsOk) {
   2706                         ++mySource;
   2707                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2708                         tempBuf[1] = (char)(trailByte + 0x80);
   2709                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2710                         mySourceChar = (mySourceChar << 8) | trailByte;
   2711                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2712                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2713                         ++mySource;
   2714                         /* add another bit so that the code below writes 2 bytes in case of error */
   2715                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2716                     }
   2717                 } else {
   2718                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2719                     args->converter->toULength = 1;
   2720                     break;
   2721                 }
   2722             }
   2723             else if(mySourceChar <= 0x7f) {
   2724                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2725             } else {
   2726                 targetUniChar = 0xffff;
   2727             }
   2728             if(targetUniChar < 0xfffe){
   2729                 if(args->offsets) {
   2730                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2731                 }
   2732                 *(myTarget++)=(UChar)targetUniChar;
   2733             }
   2734             else {
   2735                 /* Call the callback function*/
   2736                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2737                 break;
   2738             }
   2739         }
   2740         else{
   2741             *err =U_BUFFER_OVERFLOW_ERROR;
   2742             break;
   2743         }
   2744     }
   2745     args->target = myTarget;
   2746     args->source = mySource;
   2747 }
   2748 
   2749 /*************************** END ISO2022-KR *********************************/
   2750 
   2751 /*************************** ISO-2022-CN *********************************
   2752 *
   2753 * Rules for ISO-2022-CN Encoding:
   2754 * i)   The designator sequence must appear once on a line before any instance
   2755 *      of character set it designates.
   2756 * ii)  If two lines contain characters from the same character set, both lines
   2757 *      must include the designator sequence.
   2758 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2759 *      to invoke the  shifting
   2760 * iv)  All lines start in ASCII and end in ASCII.
   2761 * v)   Four shifting sequences are employed for this purpose:
   2762 *
   2763 *      Sequcence   ASCII Eq    Charsets
   2764 *      ----------  -------    ---------
   2765 *      SI           <SI>        US-ASCII
   2766 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2767 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2768 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2769 *
   2770 * vi)
   2771 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2772 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2773 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2774 *
   2775 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2776 *       characters as defined in GB 2312-80, until
   2777 *       another SOdesignation appears
   2778 *
   2779 *
   2780 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2781 *       in ISO-IR-165 (for details, see section 2.1),
   2782 *       until another SOdesignation appears
   2783 *
   2784 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2785 *       in CNS 11643-plane-1, until another
   2786 *       SOdesignation appears
   2787 *
   2788 *      ESC $ * H       Indicates the two bytes immediately following
   2789 *       SS2 is a Chinese character as defined in CNS
   2790 *       11643-plane-2, until another SS2designation
   2791 *       appears
   2792 *       (Meaning <ESC>N must preceed every 2 byte
   2793 *        sequence.)
   2794 *
   2795 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2796 *       is a Chinese character as defined in CNS
   2797 *       11643-plane-3, until another SS3designation
   2798 *       appears
   2799 *       (Meaning <ESC>O must preceed every 2 byte
   2800 *        sequence.)
   2801 *
   2802 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2803 *       is a Chinese character as defined in CNS
   2804 *       11643-plane-4, until another SS3designation
   2805 *       appears
   2806 *       (In English: <ESC>O must preceed every 2 byte
   2807 *        sequence.)
   2808 *
   2809 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2810 *       is a Chinese character as defined in CNS
   2811 *       11643-plane-5, until another SS3designation
   2812 *       appears
   2813 *
   2814 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2815 *       is a Chinese character as defined in CNS
   2816 *       11643-plane-6, until another SS3designation
   2817 *       appears
   2818 *
   2819 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2820 *       is a Chinese character as defined in CNS
   2821 *       11643-plane-7, until another SS3designation
   2822 *       appears
   2823 *
   2824 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2825 *       has its own designation information before any Chinese characters
   2826 *       appear
   2827 *
   2828 */
   2829 
   2830 /* The following are defined this way to make the strings truely readonly */
   2831 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2832 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2833 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2834 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2835 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2836 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2837 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2838 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2839 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2840 
   2841 /********************** ISO2022-CN Data **************************/
   2842 static const char* const escSeqCharsCN[10] ={
   2843         SHIFT_IN_STR,           /* ASCII */
   2844         GB_2312_80_STR,
   2845         ISO_IR_165_STR,
   2846         CNS_11643_1992_Plane_1_STR,
   2847         CNS_11643_1992_Plane_2_STR,
   2848         CNS_11643_1992_Plane_3_STR,
   2849         CNS_11643_1992_Plane_4_STR,
   2850         CNS_11643_1992_Plane_5_STR,
   2851         CNS_11643_1992_Plane_6_STR,
   2852         CNS_11643_1992_Plane_7_STR
   2853 };
   2854 
   2855 static void
   2856 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2857     UConverter *cnv = args->converter;
   2858     UConverterDataISO2022 *converterData;
   2859     ISO2022State *pFromU2022State;
   2860     uint8_t *target = (uint8_t *) args->target;
   2861     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2862     const UChar* source = args->source;
   2863     const UChar* sourceLimit = args->sourceLimit;
   2864     int32_t* offsets = args->offsets;
   2865     UChar32 sourceChar;
   2866     char buffer[8];
   2867     int32_t len;
   2868     int8_t choices[3];
   2869     int32_t choiceCount;
   2870     uint32_t targetValue = 0;
   2871     UBool useFallback;
   2872 
   2873     /* set up the state */
   2874     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2875     pFromU2022State   = &converterData->fromU2022State;
   2876 
   2877     choiceCount = 0;
   2878 
   2879     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2880     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2881         goto getTrail;
   2882     }
   2883 
   2884     while( source < sourceLimit){
   2885         if(target < targetLimit){
   2886 
   2887             sourceChar  = *(source++);
   2888             /*check if the char is a First surrogate*/
   2889              if(UTF_IS_SURROGATE(sourceChar)) {
   2890                 if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
   2891 getTrail:
   2892                     /*look ahead to find the trail surrogate*/
   2893                     if(source < sourceLimit) {
   2894                         /* test the following code unit */
   2895                         UChar trail=(UChar) *source;
   2896                         if(UTF_IS_SECOND_SURROGATE(trail)) {
   2897                             source++;
   2898                             sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
   2899                             cnv->fromUChar32=0x00;
   2900                             /* convert this supplementary code point */
   2901                             /* exit this condition tree */
   2902                         } else {
   2903                             /* this is an unmatched lead code unit (1st surrogate) */
   2904                             /* callback(illegal) */
   2905                             *err=U_ILLEGAL_CHAR_FOUND;
   2906                             cnv->fromUChar32=sourceChar;
   2907                             break;
   2908                         }
   2909                     } else {
   2910                         /* no more input */
   2911                         cnv->fromUChar32=sourceChar;
   2912                         break;
   2913                     }
   2914                 } else {
   2915                     /* this is an unmatched trail code unit (2nd surrogate) */
   2916                     /* callback(illegal) */
   2917                     *err=U_ILLEGAL_CHAR_FOUND;
   2918                     cnv->fromUChar32=sourceChar;
   2919                     break;
   2920                 }
   2921             }
   2922 
   2923             /* do the conversion */
   2924             if(sourceChar <= 0x007f ){
   2925                 /* do not convert SO/SI/ESC */
   2926                 if(IS_2022_CONTROL(sourceChar)) {
   2927                     /* callback(illegal) */
   2928                     *err=U_ILLEGAL_CHAR_FOUND;
   2929                     cnv->fromUChar32=sourceChar;
   2930                     break;
   2931                 }
   2932 
   2933                 /* US-ASCII */
   2934                 if(pFromU2022State->g == 0) {
   2935                     buffer[0] = (char)sourceChar;
   2936                     len = 1;
   2937                 } else {
   2938                     buffer[0] = UCNV_SI;
   2939                     buffer[1] = (char)sourceChar;
   2940                     len = 2;
   2941                     pFromU2022State->g = 0;
   2942                     choiceCount = 0;
   2943                 }
   2944                 if(sourceChar == CR || sourceChar == LF) {
   2945                     /* reset the state at the end of a line */
   2946                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2947                     choiceCount = 0;
   2948                 }
   2949             }
   2950             else{
   2951                 /* convert U+0080..U+10ffff */
   2952                 int32_t i;
   2953                 int8_t cs, g;
   2954 
   2955                 if(choiceCount == 0) {
   2956                     /* try the current SO/G1 converter first */
   2957                     choices[0] = pFromU2022State->cs[1];
   2958 
   2959                     /* default to GB2312_1 if none is designated yet */
   2960                     if(choices[0] == 0) {
   2961                         choices[0] = GB2312_1;
   2962                     }
   2963 
   2964                     if(converterData->version == 0) {
   2965                         /* ISO-2022-CN */
   2966 
   2967                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   2968                         if(choices[0] == GB2312_1) {
   2969                             choices[1] = (int8_t)CNS_11643_1;
   2970                         } else {
   2971                             choices[1] = (int8_t)GB2312_1;
   2972                         }
   2973 
   2974                         choiceCount = 2;
   2975                     } else {
   2976                         /* ISO-2022-CN-EXT */
   2977 
   2978                         /* try one of the other converters */
   2979                         switch(choices[0]) {
   2980                         case GB2312_1:
   2981                             choices[1] = (int8_t)CNS_11643_1;
   2982                             choices[2] = (int8_t)ISO_IR_165;
   2983                             break;
   2984                         case ISO_IR_165:
   2985                             choices[1] = (int8_t)GB2312_1;
   2986                             choices[2] = (int8_t)CNS_11643_1;
   2987                             break;
   2988                         default: /* CNS_11643_x */
   2989                             choices[1] = (int8_t)GB2312_1;
   2990                             choices[2] = (int8_t)ISO_IR_165;
   2991                             break;
   2992                         }
   2993 
   2994                         choiceCount = 3;
   2995                     }
   2996                 }
   2997 
   2998                 cs = g = 0;
   2999                 /*
   3000                  * len==0: no mapping found yet
   3001                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3002                  * len>0: found a roundtrip result, done
   3003                  */
   3004                 len = 0;
   3005                 /*
   3006                  * We will turn off useFallback after finding a fallback,
   3007                  * but we still get fallbacks from PUA code points as usual.
   3008                  * Therefore, we will also need to check that we don't overwrite
   3009                  * an early fallback with a later one.
   3010                  */
   3011                 useFallback = cnv->useFallback;
   3012 
   3013                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3014                     int8_t cs0 = choices[i];
   3015                     if(cs0 > 0) {
   3016                         uint32_t value;
   3017                         int32_t len2;
   3018                         if(cs0 >= CNS_11643_0) {
   3019                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3020                                         converterData->myConverterArray[CNS_11643],
   3021                                         sourceChar,
   3022                                         &value,
   3023                                         useFallback,
   3024                                         MBCS_OUTPUT_3);
   3025                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3026                                 targetValue = value;
   3027                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3028                                 if(len2 >= 0) {
   3029                                     len = 2;
   3030                                 } else {
   3031                                     len = -2;
   3032                                     useFallback = FALSE;
   3033                                 }
   3034                                 if(cs == CNS_11643_1) {
   3035                                     g = 1;
   3036                                 } else if(cs == CNS_11643_2) {
   3037                                     g = 2;
   3038                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3039                                     g = 3;
   3040                                 } else {
   3041                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3042                                     len = 0;
   3043                                 }
   3044                             }
   3045                         } else {
   3046                             /* GB2312_1 or ISO-IR-165 */
   3047                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3048                                         converterData->myConverterArray[cs0],
   3049                                         sourceChar,
   3050                                         &value,
   3051                                         useFallback,
   3052                                         MBCS_OUTPUT_2);
   3053                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3054                                 targetValue = value;
   3055                                 len = len2;
   3056                                 cs = cs0;
   3057                                 g = 1;
   3058                                 useFallback = FALSE;
   3059                             }
   3060                         }
   3061                     }
   3062                 }
   3063 
   3064                 if(len != 0) {
   3065                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3066 
   3067                     /* write the designation sequence if necessary */
   3068                     if(cs != pFromU2022State->cs[g]) {
   3069                         if(cs < CNS_11643) {
   3070                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3071                         } else {
   3072                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3073                         }
   3074                         len = 4;
   3075                         pFromU2022State->cs[g] = cs;
   3076                         if(g == 1) {
   3077                             /* changing the SO/G1 charset invalidates the choices[] */
   3078                             choiceCount = 0;
   3079                         }
   3080                     }
   3081 
   3082                     /* write the shift sequence if necessary */
   3083                     if(g != pFromU2022State->g) {
   3084                         switch(g) {
   3085                         case 1:
   3086                             buffer[len++] = UCNV_SO;
   3087 
   3088                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3089                             pFromU2022State->g = 1;
   3090                             break;
   3091                         case 2:
   3092                             buffer[len++] = 0x1b;
   3093                             buffer[len++] = 0x4e;
   3094                             break;
   3095                         default: /* case 3 */
   3096                             buffer[len++] = 0x1b;
   3097                             buffer[len++] = 0x4f;
   3098                             break;
   3099                         }
   3100                     }
   3101 
   3102                     /* write the two output bytes */
   3103                     buffer[len++] = (char)(targetValue >> 8);
   3104                     buffer[len++] = (char)targetValue;
   3105                 } else {
   3106                     /* if we cannot find the character after checking all codepages
   3107                      * then this is an error
   3108                      */
   3109                     *err = U_INVALID_CHAR_FOUND;
   3110                     cnv->fromUChar32=sourceChar;
   3111                     break;
   3112                 }
   3113             }
   3114 
   3115             /* output len>0 bytes in buffer[] */
   3116             if(len == 1) {
   3117                 *target++ = buffer[0];
   3118                 if(offsets) {
   3119                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3120                 }
   3121             } else if(len == 2 && (target + 2) <= targetLimit) {
   3122                 *target++ = buffer[0];
   3123                 *target++ = buffer[1];
   3124                 if(offsets) {
   3125                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3126                     *offsets++ = sourceIndex;
   3127                     *offsets++ = sourceIndex;
   3128                 }
   3129             } else {
   3130                 fromUWriteUInt8(
   3131                     cnv,
   3132                     buffer, len,
   3133                     &target, (const char *)targetLimit,
   3134                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3135                     err);
   3136                 if(U_FAILURE(*err)) {
   3137                     break;
   3138                 }
   3139             }
   3140         } /* end if(myTargetIndex<myTargetLength) */
   3141         else{
   3142             *err =U_BUFFER_OVERFLOW_ERROR;
   3143             break;
   3144         }
   3145 
   3146     }/* end while(mySourceIndex<mySourceLength) */
   3147 
   3148     /*
   3149      * the end of the input stream and detection of truncated input
   3150      * are handled by the framework, but for ISO-2022-CN conversion
   3151      * we need to be in ASCII mode at the very end
   3152      *
   3153      * conditions:
   3154      *   successful
   3155      *   not in ASCII mode
   3156      *   end of input and no truncated input
   3157      */
   3158     if( U_SUCCESS(*err) &&
   3159         pFromU2022State->g!=0 &&
   3160         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3161     ) {
   3162         int32_t sourceIndex;
   3163 
   3164         /* we are switching to ASCII */
   3165         pFromU2022State->g=0;
   3166 
   3167         /* get the source index of the last input character */
   3168         /*
   3169          * TODO this would be simpler and more reliable if we used a pair
   3170          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3171          * so that we could simply use the prevSourceIndex here;
   3172          * this code gives an incorrect result for the rare case of an unmatched
   3173          * trail surrogate that is alone in the last buffer of the text stream
   3174          */
   3175         sourceIndex=(int32_t)(source-args->source);
   3176         if(sourceIndex>0) {
   3177             --sourceIndex;
   3178             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3179                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3180             ) {
   3181                 --sourceIndex;
   3182             }
   3183         } else {
   3184             sourceIndex=-1;
   3185         }
   3186 
   3187         fromUWriteUInt8(
   3188             cnv,
   3189             SHIFT_IN_STR, 1,
   3190             &target, (const char *)targetLimit,
   3191             &offsets, sourceIndex,
   3192             err);
   3193     }
   3194 
   3195     /*save the state and return */
   3196     args->source = source;
   3197     args->target = (char*)target;
   3198 }
   3199 
   3200 
   3201 static void
   3202 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3203                                                UErrorCode* err){
   3204     char tempBuf[3];
   3205     const char *mySource = (char *) args->source;
   3206     UChar *myTarget = args->target;
   3207     const char *mySourceLimit = args->sourceLimit;
   3208     uint32_t targetUniChar = 0x0000;
   3209     uint32_t mySourceChar = 0x0000;
   3210     UConverterDataISO2022* myData;
   3211     ISO2022State *pToU2022State;
   3212 
   3213     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3214     pToU2022State = &myData->toU2022State;
   3215 
   3216     if(myData->key != 0) {
   3217         /* continue with a partial escape sequence */
   3218         goto escape;
   3219     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3220         /* continue with a partial double-byte character */
   3221         mySourceChar = args->converter->toUBytes[0];
   3222         args->converter->toULength = 0;
   3223         targetUniChar = missingCharMarker;
   3224         goto getTrailByte;
   3225     }
   3226 
   3227     while(mySource < mySourceLimit){
   3228 
   3229         targetUniChar =missingCharMarker;
   3230 
   3231         if(myTarget < args->targetLimit){
   3232 
   3233             mySourceChar= (unsigned char) *mySource++;
   3234 
   3235             switch(mySourceChar){
   3236             case UCNV_SI:
   3237                 pToU2022State->g=0;
   3238                 if (myData->isEmptySegment) {
   3239                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3240                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3241                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3242                     args->converter->toUBytes[0] = mySourceChar;
   3243                     args->converter->toULength = 1;
   3244                     args->target = myTarget;
   3245                     args->source = mySource;
   3246                     return;
   3247                 }
   3248                 continue;
   3249 
   3250             case UCNV_SO:
   3251                 if(pToU2022State->cs[1] != 0) {
   3252                     pToU2022State->g=1;
   3253                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3254                     continue;
   3255                 } else {
   3256                     /* illegal to have SO before a matching designator */
   3257                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3258                     break;
   3259                 }
   3260 
   3261             case ESC_2022:
   3262                 mySource--;
   3263 escape:
   3264                 {
   3265                     const char * mySourceBefore = mySource;
   3266                     int8_t toULengthBefore = args->converter->toULength;
   3267 
   3268                     changeState_2022(args->converter,&(mySource),
   3269                         mySourceLimit, ISO_2022_CN,err);
   3270 
   3271                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3272                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3273                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3274                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3275                         args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
   3276                     }
   3277                 }
   3278 
   3279                 /* invalid or illegal escape sequence */
   3280                 if(U_FAILURE(*err)){
   3281                     args->target = myTarget;
   3282                     args->source = mySource;
   3283                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3284                     return;
   3285                 }
   3286                 continue;
   3287 
   3288             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3289 
   3290             case CR:
   3291                 /*falls through*/
   3292             case LF:
   3293                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3294                 /* falls through */
   3295             default:
   3296                 /* convert one or two bytes */
   3297                 myData->isEmptySegment = FALSE;
   3298                 if(pToU2022State->g != 0) {
   3299                     if(mySource < mySourceLimit) {
   3300                         UConverterSharedData *cnv;
   3301                         StateEnum tempState;
   3302                         int32_t tempBufLen;
   3303                         int leadIsOk, trailIsOk;
   3304                         uint8_t trailByte;
   3305 getTrailByte:
   3306                         trailByte = (uint8_t)*mySource;
   3307                         /*
   3308                          * Ticket 5691: consistent illegal sequences:
   3309                          * - We include at least the first byte in the illegal sequence.
   3310                          * - If any of the non-initial bytes could be the start of a character,
   3311                          *   we stop the illegal sequence before the first one of those.
   3312                          *
   3313                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3314                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3315                          * Otherwise we convert or report the pair of bytes.
   3316                          */
   3317                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3318                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3319                         if (leadIsOk && trailIsOk) {
   3320                             ++mySource;
   3321                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3322                             if(tempState >= CNS_11643_0) {
   3323                                 cnv = myData->myConverterArray[CNS_11643];
   3324                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3325                                 tempBuf[1] = (char) (mySourceChar);
   3326                                 tempBuf[2] = (char) trailByte;
   3327                                 tempBufLen = 3;
   3328 
   3329                             }else{
   3330                                 cnv = myData->myConverterArray[tempState];
   3331                                 tempBuf[0] = (char) (mySourceChar);
   3332                                 tempBuf[1] = (char) trailByte;
   3333                                 tempBufLen = 2;
   3334                             }
   3335                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3336                             mySourceChar = (mySourceChar << 8) | trailByte;
   3337                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3338                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3339                             ++mySource;
   3340                             /* add another bit so that the code below writes 2 bytes in case of error */
   3341                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3342                         }
   3343                         if(pToU2022State->g>=2) {
   3344                             /* return from a single-shift state to the previous one */
   3345                             pToU2022State->g=pToU2022State->prevG;
   3346                         }
   3347                     } else {
   3348                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3349                         args->converter->toULength = 1;
   3350                         goto endloop;
   3351                     }
   3352                 }
   3353                 else{
   3354                     if(mySourceChar <= 0x7f) {
   3355                         targetUniChar = (UChar) mySourceChar;
   3356                     }
   3357                 }
   3358                 break;
   3359             }
   3360             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3361                 if(args->offsets){
   3362                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3363                 }
   3364                 *(myTarget++)=(UChar)targetUniChar;
   3365             }
   3366             else if(targetUniChar > missingCharMarker){
   3367                 /* disassemble the surrogate pair and write to output*/
   3368                 targetUniChar-=0x0010000;
   3369                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3370                 if(args->offsets){
   3371                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3372                 }
   3373                 ++myTarget;
   3374                 if(myTarget< args->targetLimit){
   3375                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3376                     if(args->offsets){
   3377                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3378                     }
   3379                     ++myTarget;
   3380                 }else{
   3381                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3382                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3383                 }
   3384 
   3385             }
   3386             else{
   3387                 /* Call the callback function*/
   3388                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3389                 break;
   3390             }
   3391         }
   3392         else{
   3393             *err =U_BUFFER_OVERFLOW_ERROR;
   3394             break;
   3395         }
   3396     }
   3397 endloop:
   3398     args->target = myTarget;
   3399     args->source = mySource;
   3400 }
   3401 
   3402 static void
   3403 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3404     UConverter *cnv = args->converter;
   3405     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3406     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3407     char *p, *subchar;
   3408     char buffer[8];
   3409     int32_t length;
   3410 
   3411     subchar=(char *)cnv->subChars;
   3412     length=cnv->subCharLen; /* assume length==1 for most variants */
   3413 
   3414     p = buffer;
   3415     switch(myConverterData->locale[0]){
   3416     case 'j':
   3417         {
   3418             int8_t cs;
   3419 
   3420             if(pFromU2022State->g == 1) {
   3421                 /* JIS7: switch from G1 to G0 */
   3422                 pFromU2022State->g = 0;
   3423                 *p++ = UCNV_SI;
   3424             }
   3425 
   3426             cs = pFromU2022State->cs[0];
   3427             if(cs != ASCII && cs != JISX201) {
   3428                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3429                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3430                 *p++ = '\x1b';
   3431                 *p++ = '\x28';
   3432                 *p++ = '\x42';
   3433             }
   3434 
   3435             *p++ = subchar[0];
   3436             break;
   3437         }
   3438     case 'c':
   3439         if(pFromU2022State->g != 0) {
   3440             /* not in ASCII mode: switch to ASCII */
   3441             pFromU2022State->g = 0;
   3442             *p++ = UCNV_SI;
   3443         }
   3444         *p++ = subchar[0];
   3445         break;
   3446     case 'k':
   3447         if(myConverterData->version == 0) {
   3448             if(length == 1) {
   3449                 if((UBool)args->converter->fromUnicodeStatus) {
   3450                     /* in DBCS mode: switch to SBCS */
   3451                     args->converter->fromUnicodeStatus = 0;
   3452                     *p++ = UCNV_SI;
   3453                 }
   3454                 *p++ = subchar[0];
   3455             } else /* length == 2*/ {
   3456                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3457                     /* in SBCS mode: switch to DBCS */
   3458                     args->converter->fromUnicodeStatus = 1;
   3459                     *p++ = UCNV_SO;
   3460                 }
   3461                 *p++ = subchar[0];
   3462                 *p++ = subchar[1];
   3463             }
   3464             break;
   3465         } else {
   3466             /* save the subconverter's substitution string */
   3467             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3468             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3469 
   3470             /* set our substitution string into the subconverter */
   3471             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3472             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3473 
   3474             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3475             args->converter = myConverterData->currentConverter;
   3476             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3477             ucnv_cbFromUWriteSub(args, 0, err);
   3478             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3479             args->converter = cnv;
   3480 
   3481             /* restore the subconverter's substitution string */
   3482             myConverterData->currentConverter->subChars = currentSubChars;
   3483             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3484 
   3485             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3486                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3487                     uprv_memcpy(
   3488                         cnv->charErrorBuffer,
   3489                         myConverterData->currentConverter->charErrorBuffer,
   3490                         myConverterData->currentConverter->charErrorBufferLength);
   3491                 }
   3492                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3493                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3494             }
   3495             return;
   3496         }
   3497     default:
   3498         /* not expected */
   3499         break;
   3500     }
   3501     ucnv_cbFromUWriteBytes(args,
   3502                            buffer, (int32_t)(p - buffer),
   3503                            offsetIndex, err);
   3504 }
   3505 
   3506 /*
   3507  * Structure for cloning an ISO 2022 converter into a single memory block.
   3508  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3509  * and then ucnv_safeClone() of the sub-converter may additionally align
   3510  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3511  * after currentConverter.
   3512  * This is because UAlignedMemory may be larger than the actually
   3513  * necessary alignment size for the platform.
   3514  * The other cloneStruct fields will not be moved around,
   3515  * and are aligned properly with cloneStruct's alignment.
   3516  */
   3517 struct cloneStruct
   3518 {
   3519     UConverter cnv;
   3520     UConverter currentConverter;
   3521     UAlignedMemory deadSpace;
   3522     UConverterDataISO2022 mydata;
   3523 };
   3524 
   3525 
   3526 static UConverter *
   3527 _ISO_2022_SafeClone(
   3528             const UConverter *cnv,
   3529             void *stackBuffer,
   3530             int32_t *pBufferSize,
   3531             UErrorCode *status)
   3532 {
   3533     struct cloneStruct * localClone;
   3534     UConverterDataISO2022 *cnvData;
   3535     int32_t i, size;
   3536 
   3537     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3538         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3539         return NULL;
   3540     }
   3541 
   3542     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3543     localClone = (struct cloneStruct *)stackBuffer;
   3544 
   3545     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3546 
   3547     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3548     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3549     localClone->cnv.isExtraLocal = TRUE;
   3550 
   3551     /* share the subconverters */
   3552 
   3553     if(cnvData->currentConverter != NULL) {
   3554         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3555         localClone->mydata.currentConverter =
   3556             ucnv_safeClone(cnvData->currentConverter,
   3557                             &localClone->currentConverter,
   3558                             &size, status);
   3559         if(U_FAILURE(*status)) {
   3560             return NULL;
   3561         }
   3562     }
   3563 
   3564     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3565         if(cnvData->myConverterArray[i] != NULL) {
   3566             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3567         }
   3568     }
   3569 
   3570     return &localClone->cnv;
   3571 }
   3572 
   3573 static void
   3574 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3575                     const USetAdder *sa,
   3576                     UConverterUnicodeSet which,
   3577                     UErrorCode *pErrorCode)
   3578 {
   3579     int32_t i;
   3580     UConverterDataISO2022* cnvData;
   3581 
   3582     if (U_FAILURE(*pErrorCode)) {
   3583         return;
   3584     }
   3585 #ifdef U_ENABLE_GENERIC_ISO_2022
   3586     if (cnv->sharedData == &_ISO2022Data) {
   3587         /* We use UTF-8 in this case */
   3588         sa->addRange(sa->set, 0, 0xd7FF);
   3589         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3590         return;
   3591     }
   3592 #endif
   3593 
   3594     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3595 
   3596     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3597     switch(cnvData->locale[0]){
   3598     case 'j':
   3599         /* include JIS X 0201 which is hardcoded */
   3600         sa->add(sa->set, 0xa5);
   3601         sa->add(sa->set, 0x203e);
   3602         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3603             /* include Latin-1 for some variants of JP */
   3604             sa->addRange(sa->set, 0, 0xff);
   3605         } else {
   3606             /* include ASCII for JP */
   3607             sa->addRange(sa->set, 0, 0x7f);
   3608         }
   3609         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3610             /*
   3611              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3612              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3613              * use half-width Katakana.
   3614              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3615              * half-width Katakana via the ESC ( I sequence.
   3616              * However, we only emit (fromUnicode) half-width Katakana according to the
   3617              * definition of each variant.
   3618              *
   3619              * When including fallbacks,
   3620              * we need to include half-width Katakana Unicode code points for all JP variants because
   3621              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3622              */
   3623             /* include half-width Katakana for JP */
   3624             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3625         }
   3626         break;
   3627     case 'c':
   3628     case 'z':
   3629         /* include ASCII for CN */
   3630         sa->addRange(sa->set, 0, 0x7f);
   3631         break;
   3632     case 'k':
   3633         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3634         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3635                 cnvData->currentConverter, sa, which, pErrorCode);
   3636         /* the loop over myConverterArray[] will simply not find another converter */
   3637         break;
   3638     default:
   3639         break;
   3640     }
   3641 
   3642 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3643             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3644                 cnvData->version==0 && i==CNS_11643
   3645             ) {
   3646                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3647                 ucnv_MBCSGetUnicodeSetForBytes(
   3648                         cnvData->myConverterArray[i],
   3649                         sa, UCNV_ROUNDTRIP_SET,
   3650                         0, 0x81, 0x82,
   3651                         pErrorCode);
   3652             }
   3653 #endif
   3654 
   3655     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3656         UConverterSetFilter filter;
   3657         if(cnvData->myConverterArray[i]!=NULL) {
   3658             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3659                 cnvData->version==0 && i==CNS_11643
   3660             ) {
   3661                 /*
   3662                  * Version-specific for CN:
   3663                  * CN version 0 does not map CNS planes 3..7 although
   3664                  * they are all available in the CNS conversion table;
   3665                  * CN version 1 (-EXT) does map them all.
   3666                  * The two versions create different Unicode sets.
   3667                  */
   3668                 filter=UCNV_SET_FILTER_2022_CN;
   3669             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3670                 /*
   3671                  * Only add code points that map to Shift-JIS codes
   3672                  * corresponding to JIS X 0208.
   3673                  */
   3674                 filter=UCNV_SET_FILTER_SJIS;
   3675             } else if(i==KSC5601) {
   3676                 /*
   3677                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3678                  * are broader than GR94.
   3679                  */
   3680                 filter=UCNV_SET_FILTER_GR94DBCS;
   3681             } else {
   3682                 filter=UCNV_SET_FILTER_NONE;
   3683             }
   3684             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3685         }
   3686     }
   3687 
   3688     /*
   3689      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3690      * sub-converters do by themselves.
   3691      * Remove these characters from the set.
   3692      */
   3693     sa->remove(sa->set, 0x0e);
   3694     sa->remove(sa->set, 0x0f);
   3695     sa->remove(sa->set, 0x1b);
   3696 
   3697     /* ISO 2022 converters do not convert C1 controls either */
   3698     sa->removeRange(sa->set, 0x80, 0x9f);
   3699 }
   3700 
   3701 static const UConverterImpl _ISO2022Impl={
   3702     UCNV_ISO_2022,
   3703 
   3704     NULL,
   3705     NULL,
   3706 
   3707     _ISO2022Open,
   3708     _ISO2022Close,
   3709     _ISO2022Reset,
   3710 
   3711 #ifdef U_ENABLE_GENERIC_ISO_2022
   3712     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3713     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3714     ucnv_fromUnicode_UTF8,
   3715     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3716 #else
   3717     NULL,
   3718     NULL,
   3719     NULL,
   3720     NULL,
   3721 #endif
   3722     NULL,
   3723 
   3724     NULL,
   3725     _ISO2022getName,
   3726     _ISO_2022_WriteSub,
   3727     _ISO_2022_SafeClone,
   3728     _ISO_2022_GetUnicodeSet
   3729 };
   3730 static const UConverterStaticData _ISO2022StaticData={
   3731     sizeof(UConverterStaticData),
   3732     "ISO_2022",
   3733     2022,
   3734     UCNV_IBM,
   3735     UCNV_ISO_2022,
   3736     1,
   3737     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3738     { 0x1a, 0, 0, 0 },
   3739     1,
   3740     FALSE,
   3741     FALSE,
   3742     0,
   3743     0,
   3744     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3745 };
   3746 const UConverterSharedData _ISO2022Data={
   3747     sizeof(UConverterSharedData),
   3748     ~((uint32_t) 0),
   3749     NULL,
   3750     NULL,
   3751     &_ISO2022StaticData,
   3752     FALSE,
   3753     &_ISO2022Impl,
   3754     0
   3755 };
   3756 
   3757 /*************JP****************/
   3758 static const UConverterImpl _ISO2022JPImpl={
   3759     UCNV_ISO_2022,
   3760 
   3761     NULL,
   3762     NULL,
   3763 
   3764     _ISO2022Open,
   3765     _ISO2022Close,
   3766     _ISO2022Reset,
   3767 
   3768     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3769     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3770     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3771     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3772     NULL,
   3773 
   3774     NULL,
   3775     _ISO2022getName,
   3776     _ISO_2022_WriteSub,
   3777     _ISO_2022_SafeClone,
   3778     _ISO_2022_GetUnicodeSet
   3779 };
   3780 static const UConverterStaticData _ISO2022JPStaticData={
   3781     sizeof(UConverterStaticData),
   3782     "ISO_2022_JP",
   3783     0,
   3784     UCNV_IBM,
   3785     UCNV_ISO_2022,
   3786     1,
   3787     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3788     { 0x1a, 0, 0, 0 },
   3789     1,
   3790     FALSE,
   3791     FALSE,
   3792     0,
   3793     0,
   3794     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3795 };
   3796 static const UConverterSharedData _ISO2022JPData={
   3797     sizeof(UConverterSharedData),
   3798     ~((uint32_t) 0),
   3799     NULL,
   3800     NULL,
   3801     &_ISO2022JPStaticData,
   3802     FALSE,
   3803     &_ISO2022JPImpl,
   3804     0
   3805 };
   3806 
   3807 /************* KR ***************/
   3808 static const UConverterImpl _ISO2022KRImpl={
   3809     UCNV_ISO_2022,
   3810 
   3811     NULL,
   3812     NULL,
   3813 
   3814     _ISO2022Open,
   3815     _ISO2022Close,
   3816     _ISO2022Reset,
   3817 
   3818     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3819     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3820     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3821     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3822     NULL,
   3823 
   3824     NULL,
   3825     _ISO2022getName,
   3826     _ISO_2022_WriteSub,
   3827     _ISO_2022_SafeClone,
   3828     _ISO_2022_GetUnicodeSet
   3829 };
   3830 static const UConverterStaticData _ISO2022KRStaticData={
   3831     sizeof(UConverterStaticData),
   3832     "ISO_2022_KR",
   3833     0,
   3834     UCNV_IBM,
   3835     UCNV_ISO_2022,
   3836     1,
   3837     3, /* max 3 bytes per UChar: SO+DBCS */
   3838     { 0x1a, 0, 0, 0 },
   3839     1,
   3840     FALSE,
   3841     FALSE,
   3842     0,
   3843     0,
   3844     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3845 };
   3846 static const UConverterSharedData _ISO2022KRData={
   3847     sizeof(UConverterSharedData),
   3848     ~((uint32_t) 0),
   3849     NULL,
   3850     NULL,
   3851     &_ISO2022KRStaticData,
   3852     FALSE,
   3853     &_ISO2022KRImpl,
   3854     0
   3855 };
   3856 
   3857 /*************** CN ***************/
   3858 static const UConverterImpl _ISO2022CNImpl={
   3859 
   3860     UCNV_ISO_2022,
   3861 
   3862     NULL,
   3863     NULL,
   3864 
   3865     _ISO2022Open,
   3866     _ISO2022Close,
   3867     _ISO2022Reset,
   3868 
   3869     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3870     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3871     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3872     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3873     NULL,
   3874 
   3875     NULL,
   3876     _ISO2022getName,
   3877     _ISO_2022_WriteSub,
   3878     _ISO_2022_SafeClone,
   3879     _ISO_2022_GetUnicodeSet
   3880 };
   3881 static const UConverterStaticData _ISO2022CNStaticData={
   3882     sizeof(UConverterStaticData),
   3883     "ISO_2022_CN",
   3884     0,
   3885     UCNV_IBM,
   3886     UCNV_ISO_2022,
   3887     1,
   3888     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3889     { 0x1a, 0, 0, 0 },
   3890     1,
   3891     FALSE,
   3892     FALSE,
   3893     0,
   3894     0,
   3895     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3896 };
   3897 static const UConverterSharedData _ISO2022CNData={
   3898     sizeof(UConverterSharedData),
   3899     ~((uint32_t) 0),
   3900     NULL,
   3901     NULL,
   3902     &_ISO2022CNStaticData,
   3903     FALSE,
   3904     &_ISO2022CNImpl,
   3905     0
   3906 };
   3907 
   3908 
   3909 
   3910 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3911