Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "unicode/utf16.h"
     38 #include "ucnv_imp.h"
     39 #include "ucnv_bld.h"
     40 #include "ucnv_cnv.h"
     41 #include "ucnvmbcs.h"
     42 #include "cstring.h"
     43 #include "cmemory.h"
     44 #include "uassert.h"
     45 
     46 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     47 
     48 #ifdef U_ENABLE_GENERIC_ISO_2022
     49 /*
     50  * I am disabling the generic ISO-2022 converter after proposing to do so on
     51  * the icu mailing list two days ago.
     52  *
     53  * Reasons:
     54  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     55  *    its designation sequences, single shifts with return to the previous state,
     56  *    switch-with-no-return to UTF-16BE or similar, etc.
     57  *    This is unlike the language-specific variants like ISO-2022-JP which
     58  *    require a much smaller repertoire of ISO-2022 features.
     59  *    These variants continue to be supported.
     60  * 2. I believe that no one is really using the generic ISO-2022 converter
     61  *    but rather always one of the language-specific variants.
     62  *    Note that ICU's generic ISO-2022 converter has always output one escape
     63  *    sequence followed by UTF-8 for the whole stream.
     64  * 3. Switching between subcharsets is extremely slow, because each time
     65  *    the previous converter is closed and a new one opened,
     66  *    without any kind of caching, least-recently-used list, etc.
     67  * 4. The code is currently buggy, and given the above it does not seem
     68  *    reasonable to spend the time on maintenance.
     69  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     70  *    This means, for example, that when ISO-8859-7 is designated, the following
     71  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     72  *    The ICU ISO-2022 converter does not handle this - and has no information
     73  *    about which subconverter would have to be shifted vs. which is designed
     74  *    for 7-bit ISO-2022.
     75  *
     76  * Markus Scherer 2003-dec-03
     77  */
     78 #endif
     79 
     80 static const char SHIFT_IN_STR[]  = "\x0F";
     81 // static const char SHIFT_OUT_STR[] = "\x0E";
     82 
     83 #define CR      0x0D
     84 #define LF      0x0A
     85 #define H_TAB   0x09
     86 #define V_TAB   0x0B
     87 #define SPACE   0x20
     88 
     89 enum {
     90     HWKANA_START=0xff61,
     91     HWKANA_END=0xff9f
     92 };
     93 
     94 /*
     95  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     96  * as bytes 21..7E. (Subtract 0x80.)
     97  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     98  * as bytes 20..7F. (Subtract 0x80.)
     99  * Do not encode C1 control codes with native bytes 80..9F
    100  * as bytes 00..1F (C0 control codes).
    101  */
    102 enum {
    103     GR94_START=0xa1,
    104     GR94_END=0xfe,
    105     GR96_START=0xa0,
    106     GR96_END=0xff
    107 };
    108 
    109 /*
    110  * ISO 2022 control codes must not be converted from Unicode
    111  * because they would mess up the byte stream.
    112  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    113  * corresponding to SO, SI, and ESC.
    114  */
    115 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    116 
    117 /* for ISO-2022-JP and -CN implementations */
    118 typedef enum  {
    119         /* shared values */
    120         INVALID_STATE=-1,
    121         ASCII = 0,
    122 
    123         SS2_STATE=0x10,
    124         SS3_STATE,
    125 
    126         /* JP */
    127         ISO8859_1 = 1 ,
    128         ISO8859_7 = 2 ,
    129         JISX201  = 3,
    130         JISX208 = 4,
    131         JISX212 = 5,
    132         GB2312  =6,
    133         KSC5601 =7,
    134         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    135 
    136         /* CN */
    137         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    138         GB2312_1=1,
    139         ISO_IR_165=2,
    140         CNS_11643=3,
    141 
    142         /*
    143          * these are used in StateEnum and ISO2022State variables,
    144          * but CNS_11643 must be used to index into myConverterArray[]
    145          */
    146         CNS_11643_0=0x20,
    147         CNS_11643_1,
    148         CNS_11643_2,
    149         CNS_11643_3,
    150         CNS_11643_4,
    151         CNS_11643_5,
    152         CNS_11643_6,
    153         CNS_11643_7
    154 } StateEnum;
    155 
    156 /* is the StateEnum charset value for a DBCS charset? */
    157 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    158 
    159 #define CSM(cs) ((uint16_t)1<<(cs))
    160 
    161 /*
    162  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    163  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    164  *
    165  * Note: The converter uses some leniency:
    166  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    167  *   all versions, not just JIS7 and JIS8.
    168  * - ICU does not distinguish between different versions of JIS X 0208.
    169  */
    170 enum { MAX_JA_VERSION=4 };
    171 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    172     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    173     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    174     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    175     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    176     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    177 };
    178 
    179 typedef enum {
    180         ASCII1=0,
    181         LATIN1,
    182         SBCS,
    183         DBCS,
    184         MBCS,
    185         HWKANA
    186 }Cnv2022Type;
    187 
    188 typedef struct ISO2022State {
    189     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    190     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    191     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    192 } ISO2022State;
    193 
    194 #define UCNV_OPTIONS_VERSION_MASK 0xf
    195 #define UCNV_2022_MAX_CONVERTERS 10
    196 
    197 typedef struct{
    198     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    199     UConverter *currentConverter;
    200     Cnv2022Type currentType;
    201     ISO2022State toU2022State, fromU2022State;
    202     uint32_t key;
    203     uint32_t version;
    204 #ifdef U_ENABLE_GENERIC_ISO_2022
    205     UBool isFirstBuffer;
    206 #endif
    207     UBool isEmptySegment;
    208     char name[30];
    209     char locale[3];
    210 }UConverterDataISO2022;
    211 
    212 /* Protos */
    213 /* ISO-2022 ----------------------------------------------------------------- */
    214 
    215 /*Forward declaration */
    216 U_CFUNC void
    217 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    218                       UErrorCode * err);
    219 U_CFUNC void
    220 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    221                                     UErrorCode * err);
    222 
    223 #define ESC_2022 0x1B /*ESC*/
    224 
    225 typedef enum
    226 {
    227         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    228         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    229         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    230         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    231 } UCNV_TableStates_2022;
    232 
    233 /*
    234 * The way these state transition arrays work is:
    235 * ex : ESC$B is the sequence for JISX208
    236 *      a) First Iteration: char is ESC
    237 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    238 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    239 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    240 *             value of x is stored at escSeqStateTable_Key_2022[0]
    241 *        iii) Save this index as offset
    242 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    243 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    244 *     b) Switch on this state and continue to next char
    245 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    246 *             which is normalize_esq_chars_2022[36] == 4
    247 *         ii) x is currently 1(from above)
    248 *               x<<=5 -- x is now 32
    249 *               x+=normalize_esq_chars_2022[36]
    250 *               now x is 36
    251 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    252 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    253 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    254 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    255 *     c) Switch on this state and continue to next char
    256 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    257 *        ii) x is currently 36 (from above)
    258 *            x<<=5 -- x is now 1152
    259 *            x+=normalize_esq_chars_2022[66]
    260 *            now x is 1161
    261 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    262 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    263 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    264 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    265 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    266 */
    267 
    268 
    269 /*Below are the 3 arrays depicting a state transition table*/
    270 static const int8_t normalize_esq_chars_2022[256] = {
    271 /*       0      1       2       3       4      5       6        7       8       9           */
    272 
    273          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    274         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    275         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    276         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    277         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    278         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    279         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    280         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    281         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    282         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    283         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    297         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    298         ,0     ,0      ,0      ,0      ,0      ,0
    299 };
    300 
    301 #ifdef U_ENABLE_GENERIC_ISO_2022
    302 /*
    303  * When the generic ISO-2022 converter is completely removed, not just disabled
    304  * per #ifdef, then the following state table and the associated tables that are
    305  * dimensioned with MAX_STATES_2022 should be trimmed.
    306  *
    307  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    308  * the associated escape sequences starting with ESC ( B should be removed.
    309  * This includes the ones with key values 1097 and all of the ones above 1000000.
    310  *
    311  * For the latter, the tables can simply be truncated.
    312  * For the former, since the tables must be kept parallel, it is probably best
    313  * to simply duplicate an adjacent table cell, parallel in all tables.
    314  *
    315  * It may make sense to restructure the tables, especially by using small search
    316  * tables for the variants instead of indexing them parallel to the table here.
    317  */
    318 #endif
    319 
    320 #define MAX_STATES_2022 74
    321 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    322 /*   0           1           2           3           4           5           6           7           8           9           */
    323 
    324      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    325     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    326     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    327     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    328     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    329     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    330     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    331     ,35947631   ,35947635   ,35947636   ,35947638
    332 };
    333 
    334 #ifdef U_ENABLE_GENERIC_ISO_2022
    335 
    336 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    337  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    338 
    339      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    340     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    341     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    342     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    343     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    344     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    345     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    346     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    347 };
    348 
    349 #endif
    350 
    351 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    352 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    353      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    354     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    355     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    356     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    357     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    358     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    359     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    360     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    361 };
    362 
    363 
    364 /* Type def for refactoring changeState_2022 code*/
    365 typedef enum{
    366 #ifdef U_ENABLE_GENERIC_ISO_2022
    367     ISO_2022=0,
    368 #endif
    369     ISO_2022_JP=1,
    370     ISO_2022_KR=2,
    371     ISO_2022_CN=3
    372 } Variant2022;
    373 
    374 /*********** ISO 2022 Converter Protos ***********/
    375 static void
    376 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    377 
    378 static void
    379  _ISO2022Close(UConverter *converter);
    380 
    381 static void
    382 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    383 
    384 static const char*
    385 _ISO2022getName(const UConverter* cnv);
    386 
    387 static void
    388 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    389 
    390 static UConverter *
    391 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    392 
    393 #ifdef U_ENABLE_GENERIC_ISO_2022
    394 static void
    395 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    396 #endif
    397 
    398 namespace {
    399 
    400 /*const UConverterSharedData _ISO2022Data;*/
    401 extern const UConverterSharedData _ISO2022JPData;
    402 extern const UConverterSharedData _ISO2022KRData;
    403 extern const UConverterSharedData _ISO2022CNData;
    404 
    405 }  // namespace
    406 
    407 /*************** Converter implementations ******************/
    408 
    409 /* The purpose of this function is to get around gcc compiler warnings. */
    410 static inline void
    411 fromUWriteUInt8(UConverter *cnv,
    412                  const char *bytes, int32_t length,
    413                  uint8_t **target, const char *targetLimit,
    414                  int32_t **offsets,
    415                  int32_t sourceIndex,
    416                  UErrorCode *pErrorCode)
    417 {
    418     char *targetChars = (char *)*target;
    419     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    420                          offsets, sourceIndex, pErrorCode);
    421     *target = (uint8_t*)targetChars;
    422 
    423 }
    424 
    425 static inline void
    426 setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){
    427     if(myConverterData->version == 1) {
    428         UConverter *cnv = myConverterData->currentConverter;
    429 
    430         cnv->toUnicodeStatus=0;     /* offset */
    431         cnv->mode=0;                /* state */
    432         cnv->toULength=0;           /* byteIndex */
    433     }
    434 }
    435 
    436 static inline void
    437 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    438    /* in ISO-2022-KR the designator sequence appears only once
    439     * in a file so we append it only once
    440     */
    441     if( converter->charErrorBufferLength==0){
    442 
    443         converter->charErrorBufferLength = 4;
    444         converter->charErrorBuffer[0] = 0x1b;
    445         converter->charErrorBuffer[1] = 0x24;
    446         converter->charErrorBuffer[2] = 0x29;
    447         converter->charErrorBuffer[3] = 0x43;
    448     }
    449     if(myConverterData->version == 1) {
    450         UConverter *cnv = myConverterData->currentConverter;
    451 
    452         cnv->fromUChar32=0;
    453         cnv->fromUnicodeStatus=1;   /* prevLength */
    454     }
    455 }
    456 
    457 static void
    458 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    459 
    460     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    461 
    462     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    463     if(cnv->extraInfo != NULL) {
    464         UConverterNamePieces stackPieces;
    465         UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
    466         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    467         uint32_t version;
    468 
    469         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    470 
    471         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    472         myConverterData->currentType = ASCII1;
    473         cnv->fromUnicodeStatus =FALSE;
    474         if(pArgs->locale){
    475             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    476         }
    477         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    478         myConverterData->version = version;
    479 
    480         /* BEGIN android-changed */
    481         /* The "jk" locale ID was made up for KDDI ISO-2022-JP. */
    482         /* The "js" locale ID was made up for SoftBank ISO-2022-JP. */
    483         if((myLocale[0]=='j' &&
    484             (myLocale[1]=='a'|| myLocale[1]=='p' || myLocale[1]=='k' ||
    485              myLocale[1]=='s') &&
    486             (myLocale[2]=='_' || myLocale[2]=='\0')))
    487         {
    488             size_t len=0;
    489             /* open the required converters and cache them */
    490             if(version>MAX_JA_VERSION) {
    491                 /* prevent indexing beyond jpCharsetMasks[] */
    492                 myConverterData->version = version = 0;
    493             }
    494             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    495                 myConverterData->myConverterArray[ISO8859_7] =
    496                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    497             }
    498             if (myLocale[1]=='k') {  /* Use KDDI's version. */
    499                 myConverterData->myConverterArray[JISX208] =
    500                     ucnv_loadSharedData("kddi-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    501             } else if (myLocale[1]=='s') {  /* Use SoftBank's version. */
    502                 myConverterData->myConverterArray[JISX208] =
    503                     ucnv_loadSharedData("softbank-jisx-208-2007", &stackPieces, &stackArgs, errorCode);
    504             } else {
    505                 myConverterData->myConverterArray[JISX208] =
    506                     ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
    507             }
    508             /* END android-changed */
    509 
    510             if(jpCharsetMasks[version]&CSM(JISX212)) {
    511                 myConverterData->myConverterArray[JISX212] =
    512                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    513             }
    514             if(jpCharsetMasks[version]&CSM(GB2312)) {
    515                 myConverterData->myConverterArray[GB2312] =
    516                     /* BEGIN android-changed */
    517                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
    518                     /* END android-changed */
    519             }
    520             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    521                 myConverterData->myConverterArray[KSC5601] =
    522                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    523             }
    524 
    525             /* set the function pointers to appropriate funtions */
    526             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    527             uprv_strcpy(myConverterData->locale,"ja");
    528 
    529             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    530             len = uprv_strlen(myConverterData->name);
    531             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    532             myConverterData->name[len+1]='\0';
    533         }
    534         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    535             (myLocale[2]=='_' || myLocale[2]=='\0'))
    536         {
    537             const char *cnvName;
    538             if(version==1) {
    539                 cnvName="icu-internal-25546";
    540             } else {
    541                 /* BEGIN android-changed */
    542                 cnvName="ksc_5601";
    543                 /* END android-changed */
    544                 myConverterData->version=version=0;
    545             }
    546             if(pArgs->onlyTestIsLoadable) {
    547                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    548                 uprv_free(cnv->extraInfo);
    549                 cnv->extraInfo=NULL;
    550                 return;
    551             } else {
    552                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    553                 if (U_FAILURE(*errorCode)) {
    554                     _ISO2022Close(cnv);
    555                     return;
    556                 }
    557 
    558                 if(version==1) {
    559                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    560                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    561                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    562                 }else{
    563                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    564                 }
    565 
    566                 /* initialize the state variables */
    567                 setInitialStateToUnicodeKR(cnv, myConverterData);
    568                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    569 
    570                 /* set the function pointers to appropriate funtions */
    571                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    572                 uprv_strcpy(myConverterData->locale,"ko");
    573             }
    574         }
    575         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    576             (myLocale[2]=='_' || myLocale[2]=='\0'))
    577         {
    578 
    579             /* open the required converters and cache them */
    580             /* BEGIN android-changed */
    581             myConverterData->myConverterArray[GB2312_1] =
    582                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    583             if(version==1) {
    584                 myConverterData->myConverterArray[ISO_IR_165] =
    585                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    586             }
    587             myConverterData->myConverterArray[CNS_11643] =
    588                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    589             /* END android-changed */
    590 
    591 
    592             /* set the function pointers to appropriate funtions */
    593             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    594             uprv_strcpy(myConverterData->locale,"cn");
    595 
    596             if (version==0){
    597                 myConverterData->version = 0;
    598                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    599             }else if (version==1){
    600                 myConverterData->version = 1;
    601                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    602             }else {
    603                 myConverterData->version = 2;
    604                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
    605             }
    606         }
    607         else{
    608 #ifdef U_ENABLE_GENERIC_ISO_2022
    609             myConverterData->isFirstBuffer = TRUE;
    610 
    611             /* append the UTF-8 escape sequence */
    612             cnv->charErrorBufferLength = 3;
    613             cnv->charErrorBuffer[0] = 0x1b;
    614             cnv->charErrorBuffer[1] = 0x25;
    615             cnv->charErrorBuffer[2] = 0x42;
    616 
    617             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    618             /* initialize the state variables */
    619             uprv_strcpy(myConverterData->name,"ISO_2022");
    620 #else
    621             *errorCode = U_UNSUPPORTED_ERROR;
    622             return;
    623 #endif
    624         }
    625 
    626         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    627 
    628         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    629             _ISO2022Close(cnv);
    630         }
    631     } else {
    632         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    633     }
    634 }
    635 
    636 
    637 static void
    638 _ISO2022Close(UConverter *converter) {
    639     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    640     UConverterSharedData **array = myData->myConverterArray;
    641     int32_t i;
    642 
    643     if (converter->extraInfo != NULL) {
    644         /*close the array of converter pointers and free the memory*/
    645         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    646             if(array[i]!=NULL) {
    647                 ucnv_unloadSharedDataIfReady(array[i]);
    648             }
    649         }
    650 
    651         ucnv_close(myData->currentConverter);
    652 
    653         if(!converter->isExtraLocal){
    654             uprv_free (converter->extraInfo);
    655             converter->extraInfo = NULL;
    656         }
    657     }
    658 }
    659 
    660 static void
    661 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    662     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    663     if(choice<=UCNV_RESET_TO_UNICODE) {
    664         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    665         myConverterData->key = 0;
    666         myConverterData->isEmptySegment = FALSE;
    667     }
    668     if(choice!=UCNV_RESET_TO_UNICODE) {
    669         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    670     }
    671 #ifdef U_ENABLE_GENERIC_ISO_2022
    672     if(myConverterData->locale[0] == 0){
    673         if(choice<=UCNV_RESET_TO_UNICODE) {
    674             myConverterData->isFirstBuffer = TRUE;
    675             myConverterData->key = 0;
    676             if (converter->mode == UCNV_SO){
    677                 ucnv_close (myConverterData->currentConverter);
    678                 myConverterData->currentConverter=NULL;
    679             }
    680             converter->mode = UCNV_SI;
    681         }
    682         if(choice!=UCNV_RESET_TO_UNICODE) {
    683             /* re-append UTF-8 escape sequence */
    684             converter->charErrorBufferLength = 3;
    685             converter->charErrorBuffer[0] = 0x1b;
    686             converter->charErrorBuffer[1] = 0x28;
    687             converter->charErrorBuffer[2] = 0x42;
    688         }
    689     }
    690     else
    691 #endif
    692     {
    693         /* reset the state variables */
    694         if(myConverterData->locale[0] == 'k'){
    695             if(choice<=UCNV_RESET_TO_UNICODE) {
    696                 setInitialStateToUnicodeKR(converter, myConverterData);
    697             }
    698             if(choice!=UCNV_RESET_TO_UNICODE) {
    699                 setInitialStateFromUnicodeKR(converter, myConverterData);
    700             }
    701         }
    702     }
    703 }
    704 
    705 static const char*
    706 _ISO2022getName(const UConverter* cnv){
    707     if(cnv->extraInfo){
    708         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    709         return myData->name;
    710     }
    711     return NULL;
    712 }
    713 
    714 
    715 /*************** to unicode *******************/
    716 /****************************************************************************
    717  * Recognized escape sequences are
    718  * <ESC>(B  ASCII
    719  * <ESC>.A  ISO-8859-1
    720  * <ESC>.F  ISO-8859-7
    721  * <ESC>(J  JISX-201
    722  * <ESC>(I  JISX-201
    723  * <ESC>$B  JISX-208
    724  * <ESC>$@  JISX-208
    725  * <ESC>$(D JISX-212
    726  * <ESC>$A  GB2312
    727  * <ESC>$(C KSC5601
    728  */
    729 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    730 /*      0                1               2               3               4               5               6               7               8               9    */
    731     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    732     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    733     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    734     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    735     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    736     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    737     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    738     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    739 };
    740 
    741 /*************** to unicode *******************/
    742 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    743 /*      0                1               2               3               4               5               6               7               8               9    */
    744      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    745     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    746     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    747     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    748     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    749     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    750     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    751     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    752 };
    753 
    754 
    755 static UCNV_TableStates_2022
    756 getKey_2022(char c,int32_t* key,int32_t* offset){
    757     int32_t togo;
    758     int32_t low = 0;
    759     int32_t hi = MAX_STATES_2022;
    760     int32_t oldmid=0;
    761 
    762     togo = normalize_esq_chars_2022[(uint8_t)c];
    763     if(togo == 0) {
    764         /* not a valid character anywhere in an escape sequence */
    765         *key = 0;
    766         *offset = 0;
    767         return INVALID_2022;
    768     }
    769     togo = (*key << 5) + togo;
    770 
    771     while (hi != low)  /*binary search*/{
    772 
    773         register int32_t mid = (hi+low) >> 1; /*Finds median*/
    774 
    775         if (mid == oldmid)
    776             break;
    777 
    778         if (escSeqStateTable_Key_2022[mid] > togo){
    779             hi = mid;
    780         }
    781         else if (escSeqStateTable_Key_2022[mid] < togo){
    782             low = mid;
    783         }
    784         else /*we found it*/{
    785             *key = togo;
    786             *offset = mid;
    787             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    788         }
    789         oldmid = mid;
    790 
    791     }
    792 
    793     *key = 0;
    794     *offset = 0;
    795     return INVALID_2022;
    796 }
    797 
    798 /*runs through a state machine to determine the escape sequence - codepage correspondance
    799  */
    800 static void
    801 changeState_2022(UConverter* _this,
    802                 const char** source,
    803                 const char* sourceLimit,
    804                 Variant2022 var,
    805                 UErrorCode* err){
    806     UCNV_TableStates_2022 value;
    807     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    808     uint32_t key = myData2022->key;
    809     int32_t offset = 0;
    810     int8_t initialToULength = _this->toULength;
    811     char c;
    812 
    813     value = VALID_NON_TERMINAL_2022;
    814     while (*source < sourceLimit) {
    815         c = *(*source)++;
    816         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    817         value = getKey_2022(c,(int32_t *) &key, &offset);
    818 
    819         switch (value){
    820 
    821         case VALID_NON_TERMINAL_2022 :
    822             /* continue with the loop */
    823             break;
    824 
    825         case VALID_TERMINAL_2022:
    826             key = 0;
    827             goto DONE;
    828 
    829         case INVALID_2022:
    830             goto DONE;
    831 
    832         case VALID_MAYBE_TERMINAL_2022:
    833 #ifdef U_ENABLE_GENERIC_ISO_2022
    834             /* ESC ( B is ambiguous only for ISO_2022 itself */
    835             if(var == ISO_2022) {
    836                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    837                 _this->toULength = 0;
    838 
    839                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    840 
    841                 /* continue with the loop */
    842                 value = VALID_NON_TERMINAL_2022;
    843                 break;
    844             } else
    845 #endif
    846             {
    847                 /* not ISO_2022 itself, finish here */
    848                 value = VALID_TERMINAL_2022;
    849                 key = 0;
    850                 goto DONE;
    851             }
    852         }
    853     }
    854 
    855 DONE:
    856     myData2022->key = key;
    857 
    858     if (value == VALID_NON_TERMINAL_2022) {
    859         /* indicate that the escape sequence is incomplete: key!=0 */
    860         return;
    861     } else if (value == INVALID_2022 ) {
    862         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    863     } else /* value == VALID_TERMINAL_2022 */ {
    864         switch(var){
    865 #ifdef U_ENABLE_GENERIC_ISO_2022
    866         case ISO_2022:
    867         {
    868             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    869             if(chosenConverterName == NULL) {
    870                 /* SS2 or SS3 */
    871                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    872                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    873                 return;
    874             }
    875 
    876             _this->mode = UCNV_SI;
    877             ucnv_close(myData2022->currentConverter);
    878             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    879             if(U_SUCCESS(*err)) {
    880                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    881                 _this->mode = UCNV_SO;
    882             }
    883             break;
    884         }
    885 #endif
    886         case ISO_2022_JP:
    887             {
    888                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    889                 switch(tempState) {
    890                 case INVALID_STATE:
    891                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    892                     break;
    893                 case SS2_STATE:
    894                     if(myData2022->toU2022State.cs[2]!=0) {
    895                         if(myData2022->toU2022State.g<2) {
    896                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    897                         }
    898                         myData2022->toU2022State.g=2;
    899                     } else {
    900                         /* illegal to have SS2 before a matching designator */
    901                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    902                     }
    903                     break;
    904                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    905                 case ISO8859_1:
    906                 case ISO8859_7:
    907                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    908                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    909                     } else {
    910                         /* G2 charset for SS2 */
    911                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    912                     }
    913                     break;
    914                 default:
    915                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    916                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    917                     } else {
    918                         /* G0 charset */
    919                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    920                     }
    921                     break;
    922                 }
    923             }
    924             break;
    925         case ISO_2022_CN:
    926             {
    927                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    928                 switch(tempState) {
    929                 case INVALID_STATE:
    930                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    931                     break;
    932                 case SS2_STATE:
    933                     if(myData2022->toU2022State.cs[2]!=0) {
    934                         if(myData2022->toU2022State.g<2) {
    935                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    936                         }
    937                         myData2022->toU2022State.g=2;
    938                     } else {
    939                         /* illegal to have SS2 before a matching designator */
    940                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    941                     }
    942                     break;
    943                 case SS3_STATE:
    944                     if(myData2022->toU2022State.cs[3]!=0) {
    945                         if(myData2022->toU2022State.g<2) {
    946                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    947                         }
    948                         myData2022->toU2022State.g=3;
    949                     } else {
    950                         /* illegal to have SS3 before a matching designator */
    951                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    952                     }
    953                     break;
    954                 case ISO_IR_165:
    955                     if(myData2022->version==0) {
    956                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    957                         break;
    958                     }
    959                     /*fall through*/
    960                 case GB2312_1:
    961                     /*fall through*/
    962                 case CNS_11643_1:
    963                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    964                     break;
    965                 case CNS_11643_2:
    966                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    967                     break;
    968                 default:
    969                     /* other CNS 11643 planes */
    970                     if(myData2022->version==0) {
    971                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    972                     } else {
    973                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    974                     }
    975                     break;
    976                 }
    977             }
    978             break;
    979         case ISO_2022_KR:
    980             if(offset==0x30){
    981                 /* nothing to be done, just accept this one escape sequence */
    982             } else {
    983                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    984             }
    985             break;
    986 
    987         default:
    988             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    989             break;
    990         }
    991     }
    992     if(U_SUCCESS(*err)) {
    993         _this->toULength = 0;
    994     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
    995         if(_this->toULength>1) {
    996             /*
    997              * Ticket 5691: consistent illegal sequences:
    998              * - We include at least the first byte (ESC) in the illegal sequence.
    999              * - If any of the non-initial bytes could be the start of a character,
   1000              *   we stop the illegal sequence before the first one of those.
   1001              *   In escape sequences, all following bytes are "printable", that is,
   1002              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
   1003              *   they are valid single/lead bytes.
   1004              *   For simplicity, we always only report the initial ESC byte as the
   1005              *   illegal sequence and back out all other bytes we looked at.
   1006              */
   1007             /* Back out some bytes. */
   1008             int8_t backOutDistance=_this->toULength-1;
   1009             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
   1010             if(backOutDistance<=bytesFromThisBuffer) {
   1011                 /* same as initialToULength<=1 */
   1012                 *source-=backOutDistance;
   1013             } else {
   1014                 /* Back out bytes from the previous buffer: Need to replay them. */
   1015                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
   1016                 /* same as -(initialToULength-1) */
   1017                 /* preToULength is negative! */
   1018                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
   1019                 *source-=bytesFromThisBuffer;
   1020             }
   1021             _this->toULength=1;
   1022         }
   1023     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
   1024         _this->toUCallbackReason = UCNV_UNASSIGNED;
   1025     }
   1026 }
   1027 
   1028 /*Checks the characters of the buffer against valid 2022 escape sequences
   1029 *if the match we return a pointer to the initial start of the sequence otherwise
   1030 *we return sourceLimit
   1031 */
   1032 /*for 2022 looks ahead in the stream
   1033  *to determine the longest possible convertible
   1034  *data stream
   1035  */
   1036 static inline const char*
   1037 getEndOfBuffer_2022(const char** source,
   1038                    const char* sourceLimit,
   1039                    UBool /*flush*/){
   1040 
   1041     const char* mySource = *source;
   1042 
   1043 #ifdef U_ENABLE_GENERIC_ISO_2022
   1044     if (*source >= sourceLimit)
   1045         return sourceLimit;
   1046 
   1047     do{
   1048 
   1049         if (*mySource == ESC_2022){
   1050             int8_t i;
   1051             int32_t key = 0;
   1052             int32_t offset;
   1053             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1054 
   1055             /* Kludge: I could not
   1056             * figure out the reason for validating an escape sequence
   1057             * twice - once here and once in changeState_2022().
   1058             * is it possible to have an ESC character in a ISO2022
   1059             * byte stream which is valid in a code page? Is it legal?
   1060             */
   1061             for (i=0;
   1062             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1063             i++) {
   1064                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1065             }
   1066             if (value > 0 || *mySource==ESC_2022)
   1067                 return mySource;
   1068 
   1069             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1070                 return sourceLimit;
   1071         }
   1072     }while (++mySource < sourceLimit);
   1073 
   1074     return sourceLimit;
   1075 #else
   1076     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1077         ++mySource;
   1078     }
   1079     return mySource;
   1080 #endif
   1081 }
   1082 
   1083 
   1084 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1085  * any future change in _MBCSFromUChar32() function should be reflected here.
   1086  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1087  */
   1088 static inline int32_t
   1089 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1090                                          UChar32 c,
   1091                                          uint32_t* value,
   1092                                          UBool useFallback,
   1093                                          int outputType)
   1094 {
   1095     const int32_t *cx;
   1096     const uint16_t *table;
   1097     uint32_t stage2Entry;
   1098     uint32_t myValue;
   1099     int32_t length;
   1100     const uint8_t *p;
   1101     /*
   1102      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1103      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1104      * else U_INTERNAL_PROGRAM_ERROR.
   1105      */
   1106     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1107     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1108         table=sharedData->mbcs.fromUnicodeTable;
   1109         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1110         /* get the bytes and the length for the output */
   1111         if(outputType==MBCS_OUTPUT_2){
   1112             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1113             if(myValue<=0xff) {
   1114                 length=1;
   1115             } else {
   1116                 length=2;
   1117             }
   1118         } else /* outputType==MBCS_OUTPUT_3 */ {
   1119             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1120             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1121             if(myValue<=0xff) {
   1122                 length=1;
   1123             } else if(myValue<=0xffff) {
   1124                 length=2;
   1125             } else {
   1126                 length=3;
   1127             }
   1128         }
   1129         /* is this code point assigned, or do we use fallbacks? */
   1130         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1131             /* assigned */
   1132             *value=myValue;
   1133             return length;
   1134         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1135             /*
   1136              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1137              * There is no way with this data structure for fallback output
   1138              * to be a zero byte.
   1139              */
   1140             *value=myValue;
   1141             return -length;
   1142         }
   1143     }
   1144 
   1145     cx=sharedData->mbcs.extIndexes;
   1146     if(cx!=NULL) {
   1147         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1148     }
   1149 
   1150     /* unassigned */
   1151     return 0;
   1152 }
   1153 
   1154 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1155  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1156  * @param retval pointer to output byte
   1157  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1158  */
   1159 static inline int32_t
   1160 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1161                                        UChar32 c,
   1162                                        uint32_t* retval,
   1163                                        UBool useFallback)
   1164 {
   1165     const uint16_t *table;
   1166     int32_t value;
   1167     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1168     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1169         return 0;
   1170     }
   1171     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1172     table=sharedData->mbcs.fromUnicodeTable;
   1173     /* get the byte for the output */
   1174     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1175     /* is this code point assigned, or do we use fallbacks? */
   1176     *retval=(uint32_t)(value&0xff);
   1177     if(value>=0xf00) {
   1178         return 1;  /* roundtrip */
   1179     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1180         return -1;  /* fallback taken */
   1181     } else {
   1182         return 0;  /* no mapping */
   1183     }
   1184 }
   1185 
   1186 /*
   1187  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1188  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1189  * to move it to the ISO 2022 range 21..7E.
   1190  * Return 0 if out of range.
   1191  */
   1192 static inline uint32_t
   1193 _2022FromGR94DBCS(uint32_t value) {
   1194     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1195         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1196     ) {
   1197         return value - 0x8080;  /* shift down to 21..7e byte range */
   1198     } else {
   1199         return 0;  /* not valid for ISO 2022 */
   1200     }
   1201 }
   1202 
   1203 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1204 /*
   1205  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1206  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1207  * unchanged.
   1208  */
   1209 static inline uint32_t
   1210 _2022ToGR94DBCS(uint32_t value) {
   1211     uint32_t returnValue = value + 0x8080;
   1212     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1213         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1214         return returnValue;
   1215     } else {
   1216         return value;
   1217     }
   1218 }
   1219 #endif
   1220 
   1221 #ifdef U_ENABLE_GENERIC_ISO_2022
   1222 
   1223 /**********************************************************************************
   1224 *  ISO-2022 Converter
   1225 *
   1226 *
   1227 */
   1228 
   1229 static void
   1230 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1231                                                            UErrorCode* err){
   1232     const char* mySourceLimit, *realSourceLimit;
   1233     const char* sourceStart;
   1234     const UChar* myTargetStart;
   1235     UConverter* saveThis;
   1236     UConverterDataISO2022* myData;
   1237     int8_t length;
   1238 
   1239     saveThis = args->converter;
   1240     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1241 
   1242     realSourceLimit = args->sourceLimit;
   1243     while (args->source < realSourceLimit) {
   1244         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1245             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1246             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1247 
   1248             if(args->source < mySourceLimit) {
   1249                 if(myData->currentConverter==NULL) {
   1250                     myData->currentConverter = ucnv_open("ASCII",err);
   1251                     if(U_FAILURE(*err)){
   1252                         return;
   1253                     }
   1254 
   1255                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1256                     saveThis->mode = UCNV_SO;
   1257                 }
   1258 
   1259                 /* convert to before the ESC or until the end of the buffer */
   1260                 myData->isFirstBuffer=FALSE;
   1261                 sourceStart = args->source;
   1262                 myTargetStart = args->target;
   1263                 args->converter = myData->currentConverter;
   1264                 ucnv_toUnicode(args->converter,
   1265                     &args->target,
   1266                     args->targetLimit,
   1267                     &args->source,
   1268                     mySourceLimit,
   1269                     args->offsets,
   1270                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1271                     err);
   1272                 args->converter = saveThis;
   1273 
   1274                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1275                     /* move the overflow buffer */
   1276                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1277                     myData->currentConverter->UCharErrorBufferLength = 0;
   1278                     if(length > 0) {
   1279                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1280                                     myData->currentConverter->UCharErrorBuffer,
   1281                                     length*U_SIZEOF_UCHAR);
   1282                     }
   1283                     return;
   1284                 }
   1285 
   1286                 /*
   1287                  * At least one of:
   1288                  * -Error while converting
   1289                  * -Done with entire buffer
   1290                  * -Need to write offsets or update the current offset
   1291                  *  (leave that up to the code in ucnv.c)
   1292                  *
   1293                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1294                  */
   1295                 if (U_FAILURE(*err) ||
   1296                     (args->source == realSourceLimit) ||
   1297                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1298                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1299                 ) {
   1300                     /* copy partial or error input for truncated detection and error handling */
   1301                     if(U_FAILURE(*err)) {
   1302                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1303                         if(length > 0) {
   1304                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1305                         }
   1306                     } else {
   1307                         length = saveThis->toULength = myData->currentConverter->toULength;
   1308                         if(length > 0) {
   1309                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1310                             if(args->source < mySourceLimit) {
   1311                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1312                             }
   1313                         }
   1314                     }
   1315                     return;
   1316                 }
   1317             }
   1318         }
   1319 
   1320         sourceStart = args->source;
   1321         changeState_2022(args->converter,
   1322                &(args->source),
   1323                realSourceLimit,
   1324                ISO_2022,
   1325                err);
   1326         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1327             /* let the ucnv.c code update its current offset */
   1328             return;
   1329         }
   1330     }
   1331 }
   1332 
   1333 #endif
   1334 
   1335 /*
   1336  * To Unicode Callback helper function
   1337  */
   1338 static void
   1339 toUnicodeCallback(UConverter *cnv,
   1340                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1341                   UErrorCode* err){
   1342     if(sourceChar>0xff){
   1343         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1344         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1345         cnv->toULength = 2;
   1346     }
   1347     else{
   1348         cnv->toUBytes[0] =(char) sourceChar;
   1349         cnv->toULength = 1;
   1350     }
   1351 
   1352     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1353         *err = U_INVALID_CHAR_FOUND;
   1354     }
   1355     else{
   1356         *err = U_ILLEGAL_CHAR_FOUND;
   1357     }
   1358 }
   1359 
   1360 /**************************************ISO-2022-JP*************************************************/
   1361 
   1362 /************************************** IMPORTANT **************************************************
   1363 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1364 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1365 * The converter iterates over each Unicode codepoint
   1366 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1367 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1368 * would do as far as possible.
   1369 *
   1370 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1371 * sure that ISO-2022 is also changed.
   1372 ***************************************************************************************************
   1373 */
   1374 
   1375 /***************************************************************************************************
   1376 * Rules for ISO-2022-jp encoding
   1377 * (i)   Escape sequences must be fully contained within a line they should not
   1378 *       span new lines or CRs
   1379 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1380 *       JIS-Roman character escape sequence should follow before the line terminates
   1381 * (iii) If the first character on the line is represented by two bytes then a two
   1382 *       byte character escape sequence should precede it
   1383 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1384 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1385 *       and invoked with SS2 (ESC N).
   1386 * (vi)  If there is any G0 designation in text, there must be a switch to
   1387 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1388 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1389 *       characters such as tab or CRLF.
   1390 * (vi)  Supported encodings:
   1391 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1392 *
   1393 *  source : RFC-1554
   1394 *
   1395 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1396 *          KSC5601 : alias to ibm-949 mapping table
   1397 *          GB2312 : alias to ibm-1386 mapping table
   1398 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1399 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1400 */
   1401 
   1402 /* preference order of JP charsets */
   1403 static const StateEnum jpCharsetPref[]={
   1404     ASCII,
   1405     JISX201,
   1406     ISO8859_1,
   1407     ISO8859_7,
   1408     JISX208,
   1409     JISX212,
   1410     GB2312,
   1411     KSC5601,
   1412     HWKANA_7BIT
   1413 };
   1414 
   1415 /*
   1416  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1417  * not in order of jpCharsetPref[]!
   1418  */
   1419 static const char escSeqChars[][6] ={
   1420     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1421     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1422     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1423     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1424     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1425     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1426     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1427     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1428     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1429 
   1430 };
   1431 static  const int8_t escSeqCharsLen[] ={
   1432     3, /* length of <ESC>(B  ASCII       */
   1433     3, /* length of <ESC>.A  ISO-8859-1  */
   1434     3, /* length of <ESC>.F  ISO-8859-7  */
   1435     3, /* length of <ESC>(J  JISX-201    */
   1436     3, /* length of <ESC>$B  JISX-208    */
   1437     4, /* length of <ESC>$(D JISX-212    */
   1438     3, /* length of <ESC>$A  GB2312      */
   1439     4, /* length of <ESC>$(C KSC5601     */
   1440     3  /* length of <ESC>(I  HWKANA_7BIT */
   1441 };
   1442 
   1443 /*
   1444 * The iteration over various code pages works this way:
   1445 * i)   Get the currentState from myConverterData->currentState
   1446 * ii)  Check if the character is mapped to a valid character in the currentState
   1447 *      Yes ->  a) set the initIterState to currentState
   1448 *       b) remain in this state until an invalid character is found
   1449 *      No  ->  a) go to the next code page and find the character
   1450 * iii) Before changing the state increment the current state check if the current state
   1451 *      is equal to the intitIteration state
   1452 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1453 *       break and return a U_INVALID_CHARACTER error
   1454 *      No  ->  Continue and find the character in next code page
   1455 *
   1456 *
   1457 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1458 */
   1459 
   1460 /* Map 00..7F to Unicode according to JIS X 0201. */
   1461 static inline uint32_t
   1462 jisx201ToU(uint32_t value) {
   1463     if(value < 0x5c) {
   1464         return value;
   1465     } else if(value == 0x5c) {
   1466         return 0xa5;
   1467     } else if(value == 0x7e) {
   1468         return 0x203e;
   1469     } else /* value <= 0x7f */ {
   1470         return value;
   1471     }
   1472 }
   1473 
   1474 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1475 static inline uint32_t
   1476 jisx201FromU(uint32_t value) {
   1477     if(value<=0x7f) {
   1478         if(value!=0x5c && value!=0x7e) {
   1479             return value;
   1480         }
   1481     } else if(value==0xa5) {
   1482         return 0x5c;
   1483     } else if(value==0x203e) {
   1484         return 0x7e;
   1485     }
   1486     return 0xfffe;
   1487 }
   1488 
   1489 /*
   1490  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1491  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1492  * Return 0 if the byte pair is out of range.
   1493  */
   1494 static inline uint32_t
   1495 _2022FromSJIS(uint32_t value) {
   1496     uint8_t trail;
   1497 
   1498     if(value > 0xEFFC) {
   1499         return 0;  /* beyond JIS X 0208 */
   1500     }
   1501 
   1502     trail = (uint8_t)value;
   1503 
   1504     value &= 0xff00;  /* lead byte */
   1505     if(value <= 0x9f00) {
   1506         value -= 0x7000;
   1507     } else /* 0xe000 <= value <= 0xef00 */ {
   1508         value -= 0xb000;
   1509     }
   1510     value <<= 1;
   1511 
   1512     if(trail <= 0x9e) {
   1513         value -= 0x100;
   1514         if(trail <= 0x7e) {
   1515             value |= trail - 0x1f;
   1516         } else {
   1517             value |= trail - 0x20;
   1518         }
   1519     } else /* trail <= 0xfc */ {
   1520         value |= trail - 0x7e;
   1521     }
   1522     return value;
   1523 }
   1524 
   1525 /*
   1526  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1527  * If either byte is outside 21..7E make sure that the result is not valid
   1528  * for Shift-JIS so that the converter catches it.
   1529  * Some invalid byte values already turn into equally invalid Shift-JIS
   1530  * byte values and need not be tested explicitly.
   1531  */
   1532 static inline void
   1533 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1534     if(c1&1) {
   1535         ++c1;
   1536         if(c2 <= 0x5f) {
   1537             c2 += 0x1f;
   1538         } else if(c2 <= 0x7e) {
   1539             c2 += 0x20;
   1540         } else {
   1541             c2 = 0;  /* invalid */
   1542         }
   1543     } else {
   1544         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1545             c2 += 0x7e;
   1546         } else {
   1547             c2 = 0;  /* invalid */
   1548         }
   1549     }
   1550     c1 >>= 1;
   1551     if(c1 <= 0x2f) {
   1552         c1 += 0x70;
   1553     } else if(c1 <= 0x3f) {
   1554         c1 += 0xb0;
   1555     } else {
   1556         c1 = 0;  /* invalid */
   1557     }
   1558     bytes[0] = (char)c1;
   1559     bytes[1] = (char)c2;
   1560 }
   1561 
   1562 /*
   1563  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1564  * Katakana.
   1565  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1566  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1567  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1568  */
   1569 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1570     0x2123,  /* U+FF61 */
   1571     0x2156,
   1572     0x2157,
   1573     0x2122,
   1574     0x2126,
   1575     0x2572,
   1576     0x2521,
   1577     0x2523,
   1578     0x2525,
   1579     0x2527,
   1580     0x2529,
   1581     0x2563,
   1582     0x2565,
   1583     0x2567,
   1584     0x2543,
   1585     0x213C,  /* U+FF70 */
   1586     0x2522,
   1587     0x2524,
   1588     0x2526,
   1589     0x2528,
   1590     0x252A,
   1591     0x252B,
   1592     0x252D,
   1593     0x252F,
   1594     0x2531,
   1595     0x2533,
   1596     0x2535,
   1597     0x2537,
   1598     0x2539,
   1599     0x253B,
   1600     0x253D,
   1601     0x253F,  /* U+FF80 */
   1602     0x2541,
   1603     0x2544,
   1604     0x2546,
   1605     0x2548,
   1606     0x254A,
   1607     0x254B,
   1608     0x254C,
   1609     0x254D,
   1610     0x254E,
   1611     0x254F,
   1612     0x2552,
   1613     0x2555,
   1614     0x2558,
   1615     0x255B,
   1616     0x255E,
   1617     0x255F,  /* U+FF90 */
   1618     0x2560,
   1619     0x2561,
   1620     0x2562,
   1621     0x2564,
   1622     0x2566,
   1623     0x2568,
   1624     0x2569,
   1625     0x256A,
   1626     0x256B,
   1627     0x256C,
   1628     0x256D,
   1629     0x256F,
   1630     0x2573,
   1631     0x212B,
   1632     0x212C   /* U+FF9F */
   1633 };
   1634 
   1635 static void
   1636 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1637     UConverter *cnv = args->converter;
   1638     UConverterDataISO2022 *converterData;
   1639     ISO2022State *pFromU2022State;
   1640     uint8_t *target = (uint8_t *) args->target;
   1641     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1642     const UChar* source = args->source;
   1643     const UChar* sourceLimit = args->sourceLimit;
   1644     int32_t* offsets = args->offsets;
   1645     UChar32 sourceChar;
   1646     char buffer[8];
   1647     int32_t len, outLen;
   1648     int8_t choices[10];
   1649     int32_t choiceCount;
   1650     uint32_t targetValue = 0;
   1651     UBool useFallback;
   1652 
   1653     int32_t i;
   1654     int8_t cs, g;
   1655 
   1656     /* set up the state */
   1657     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1658     pFromU2022State   = &converterData->fromU2022State;
   1659 
   1660     choiceCount = 0;
   1661 
   1662     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1663     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1664         goto getTrail;
   1665     }
   1666 
   1667     while(source < sourceLimit) {
   1668         if(target < targetLimit) {
   1669 
   1670             sourceChar  = *(source++);
   1671             /*check if the char is a First surrogate*/
   1672             if(U16_IS_SURROGATE(sourceChar)) {
   1673                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   1674 getTrail:
   1675                     /*look ahead to find the trail surrogate*/
   1676                     if(source < sourceLimit) {
   1677                         /* test the following code unit */
   1678                         UChar trail=(UChar) *source;
   1679                         if(U16_IS_TRAIL(trail)) {
   1680                             source++;
   1681                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   1682                             cnv->fromUChar32=0x00;
   1683                             /* convert this supplementary code point */
   1684                             /* exit this condition tree */
   1685                         } else {
   1686                             /* this is an unmatched lead code unit (1st surrogate) */
   1687                             /* callback(illegal) */
   1688                             *err=U_ILLEGAL_CHAR_FOUND;
   1689                             cnv->fromUChar32=sourceChar;
   1690                             break;
   1691                         }
   1692                     } else {
   1693                         /* no more input */
   1694                         cnv->fromUChar32=sourceChar;
   1695                         break;
   1696                     }
   1697                 } else {
   1698                     /* this is an unmatched trail code unit (2nd surrogate) */
   1699                     /* callback(illegal) */
   1700                     *err=U_ILLEGAL_CHAR_FOUND;
   1701                     cnv->fromUChar32=sourceChar;
   1702                     break;
   1703                 }
   1704             }
   1705 
   1706             /* do not convert SO/SI/ESC */
   1707             if(IS_2022_CONTROL(sourceChar)) {
   1708                 /* callback(illegal) */
   1709                 *err=U_ILLEGAL_CHAR_FOUND;
   1710                 cnv->fromUChar32=sourceChar;
   1711                 break;
   1712             }
   1713 
   1714             /* do the conversion */
   1715 
   1716             if(choiceCount == 0) {
   1717                 uint16_t csm;
   1718 
   1719                 /*
   1720                  * The csm variable keeps track of which charsets are allowed
   1721                  * and not used yet while building the choices[].
   1722                  */
   1723                 csm = jpCharsetMasks[converterData->version];
   1724                 choiceCount = 0;
   1725 
   1726                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1727                 if(converterData->version == 3 || converterData->version == 4) {
   1728                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1729                 }
   1730                 /* Do not try single-byte half-width Katakana for other versions. */
   1731                 csm &= ~CSM(HWKANA_7BIT);
   1732 
   1733                 /* try the current G0 charset */
   1734                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1735                 csm &= ~CSM(cs);
   1736 
   1737                 /* try the current G2 charset */
   1738                 if((cs = pFromU2022State->cs[2]) != 0) {
   1739                     choices[choiceCount++] = cs;
   1740                     csm &= ~CSM(cs);
   1741                 }
   1742 
   1743                 /* try all the other possible charsets */
   1744                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1745                     cs = (int8_t)jpCharsetPref[i];
   1746                     if(CSM(cs) & csm) {
   1747                         choices[choiceCount++] = cs;
   1748                         csm &= ~CSM(cs);
   1749                     }
   1750                 }
   1751             }
   1752 
   1753             cs = g = 0;
   1754             /*
   1755              * len==0: no mapping found yet
   1756              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1757              * len>0: found a roundtrip result, done
   1758              */
   1759             len = 0;
   1760             /*
   1761              * We will turn off useFallback after finding a fallback,
   1762              * but we still get fallbacks from PUA code points as usual.
   1763              * Therefore, we will also need to check that we don't overwrite
   1764              * an early fallback with a later one.
   1765              */
   1766             useFallback = cnv->useFallback;
   1767 
   1768             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1769                 uint32_t value;
   1770                 int32_t len2;
   1771                 int8_t cs0 = choices[i];
   1772                 switch(cs0) {
   1773                 case ASCII:
   1774                     if(sourceChar <= 0x7f) {
   1775                         targetValue = (uint32_t)sourceChar;
   1776                         len = 1;
   1777                         cs = cs0;
   1778                         g = 0;
   1779                     }
   1780                     break;
   1781                 case ISO8859_1:
   1782                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1783                         targetValue = (uint32_t)sourceChar - 0x80;
   1784                         len = 1;
   1785                         cs = cs0;
   1786                         g = 2;
   1787                     }
   1788                     break;
   1789                 case HWKANA_7BIT:
   1790                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1791                         if(converterData->version==3) {
   1792                             /* JIS7: use G1 (SO) */
   1793                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1794                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1795                             len = 1;
   1796                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1797                             g = 1;
   1798                         } else if(converterData->version==4) {
   1799                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1800                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1801                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1802                             len = 1;
   1803 
   1804                             cs = pFromU2022State->cs[0];
   1805                             if(IS_JP_DBCS(cs)) {
   1806                                 /* switch from a DBCS charset to JISX201 */
   1807                                 cs = (int8_t)JISX201;
   1808                             }
   1809                             /* else stay in the current G0 charset */
   1810                             g = 0;
   1811                         }
   1812                         /* else do not use HWKANA_7BIT with other versions */
   1813                     }
   1814                     break;
   1815                 case JISX201:
   1816                     /* G0 SBCS */
   1817                     value = jisx201FromU(sourceChar);
   1818                     if(value <= 0x7f) {
   1819                         targetValue = value;
   1820                         len = 1;
   1821                         cs = cs0;
   1822                         g = 0;
   1823                         useFallback = FALSE;
   1824                     }
   1825                     break;
   1826                 case JISX208:
   1827                     /* G0 DBCS from Shift-JIS table */
   1828                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1829                                 converterData->myConverterArray[cs0],
   1830                                 sourceChar, &value,
   1831                                 useFallback, MBCS_OUTPUT_2);
   1832                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1833                         value = _2022FromSJIS(value);
   1834                         if(value != 0) {
   1835                             targetValue = value;
   1836                             len = len2;
   1837                             cs = cs0;
   1838                             g = 0;
   1839                             useFallback = FALSE;
   1840                         }
   1841                     } else if(len == 0 && useFallback &&
   1842                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1843                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1844                         len = -2;
   1845                         cs = cs0;
   1846                         g = 0;
   1847                         useFallback = FALSE;
   1848                     }
   1849                     break;
   1850                 case ISO8859_7:
   1851                     /* G0 SBCS forced to 7-bit output */
   1852                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1853                                 converterData->myConverterArray[cs0],
   1854                                 sourceChar, &value,
   1855                                 useFallback);
   1856                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1857                         targetValue = value - 0x80;
   1858                         len = len2;
   1859                         cs = cs0;
   1860                         g = 2;
   1861                         useFallback = FALSE;
   1862                     }
   1863                     break;
   1864                 default:
   1865                     /* G0 DBCS */
   1866                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1867                                 converterData->myConverterArray[cs0],
   1868                                 sourceChar, &value,
   1869                                 useFallback, MBCS_OUTPUT_2);
   1870                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1871                         if(cs0 == KSC5601) {
   1872                             /*
   1873                              * Check for valid bytes for the encoding scheme.
   1874                              * This is necessary because the sub-converter (windows-949)
   1875                              * has a broader encoding scheme than is valid for 2022.
   1876                              */
   1877                             value = _2022FromGR94DBCS(value);
   1878                             if(value == 0) {
   1879                                 break;
   1880                             }
   1881                         }
   1882                         targetValue = value;
   1883                         len = len2;
   1884                         cs = cs0;
   1885                         g = 0;
   1886                         useFallback = FALSE;
   1887                     }
   1888                     break;
   1889                 }
   1890             }
   1891 
   1892             if(len != 0) {
   1893                 if(len < 0) {
   1894                     len = -len;  /* fallback */
   1895                 }
   1896                 outLen = 0; /* count output bytes */
   1897 
   1898                 /* write SI if necessary (only for JIS7) */
   1899                 if(pFromU2022State->g == 1 && g == 0) {
   1900                     buffer[outLen++] = UCNV_SI;
   1901                     pFromU2022State->g = 0;
   1902                 }
   1903 
   1904                 /* write the designation sequence if necessary */
   1905                 if(cs != pFromU2022State->cs[g]) {
   1906                     int32_t escLen = escSeqCharsLen[cs];
   1907                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1908                     outLen += escLen;
   1909                     pFromU2022State->cs[g] = cs;
   1910 
   1911                     /* invalidate the choices[] */
   1912                     choiceCount = 0;
   1913                 }
   1914 
   1915                 /* write the shift sequence if necessary */
   1916                 if(g != pFromU2022State->g) {
   1917                     switch(g) {
   1918                     /* case 0 handled before writing escapes */
   1919                     case 1:
   1920                         buffer[outLen++] = UCNV_SO;
   1921                         pFromU2022State->g = 1;
   1922                         break;
   1923                     default: /* case 2 */
   1924                         buffer[outLen++] = 0x1b;
   1925                         buffer[outLen++] = 0x4e;
   1926                         break;
   1927                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1928                     }
   1929                 }
   1930 
   1931                 /* write the output bytes */
   1932                 if(len == 1) {
   1933                     buffer[outLen++] = (char)targetValue;
   1934                 } else /* len == 2 */ {
   1935                     buffer[outLen++] = (char)(targetValue >> 8);
   1936                     buffer[outLen++] = (char)targetValue;
   1937                 }
   1938             } else {
   1939                 /*
   1940                  * if we cannot find the character after checking all codepages
   1941                  * then this is an error
   1942                  */
   1943                 *err = U_INVALID_CHAR_FOUND;
   1944                 cnv->fromUChar32=sourceChar;
   1945                 break;
   1946             }
   1947 
   1948             if(sourceChar == CR || sourceChar == LF) {
   1949                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1950                 pFromU2022State->cs[2] = 0;
   1951                 choiceCount = 0;
   1952             }
   1953 
   1954             /* output outLen>0 bytes in buffer[] */
   1955             if(outLen == 1) {
   1956                 *target++ = buffer[0];
   1957                 if(offsets) {
   1958                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1959                 }
   1960             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1961                 *target++ = buffer[0];
   1962                 *target++ = buffer[1];
   1963                 if(offsets) {
   1964                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1965                     *offsets++ = sourceIndex;
   1966                     *offsets++ = sourceIndex;
   1967                 }
   1968             } else {
   1969                 fromUWriteUInt8(
   1970                     cnv,
   1971                     buffer, outLen,
   1972                     &target, (const char *)targetLimit,
   1973                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1974                     err);
   1975                 if(U_FAILURE(*err)) {
   1976                     break;
   1977                 }
   1978             }
   1979         } /* end if(myTargetIndex<myTargetLength) */
   1980         else{
   1981             *err =U_BUFFER_OVERFLOW_ERROR;
   1982             break;
   1983         }
   1984 
   1985     }/* end while(mySourceIndex<mySourceLength) */
   1986 
   1987     /*
   1988      * the end of the input stream and detection of truncated input
   1989      * are handled by the framework, but for ISO-2022-JP conversion
   1990      * we need to be in ASCII mode at the very end
   1991      *
   1992      * conditions:
   1993      *   successful
   1994      *   in SO mode or not in ASCII mode
   1995      *   end of input and no truncated input
   1996      */
   1997     if( U_SUCCESS(*err) &&
   1998         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   1999         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   2000     ) {
   2001         int32_t sourceIndex;
   2002 
   2003         outLen = 0;
   2004 
   2005         if(pFromU2022State->g != 0) {
   2006             buffer[outLen++] = UCNV_SI;
   2007             pFromU2022State->g = 0;
   2008         }
   2009 
   2010         if(pFromU2022State->cs[0] != ASCII) {
   2011             int32_t escLen = escSeqCharsLen[ASCII];
   2012             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   2013             outLen += escLen;
   2014             pFromU2022State->cs[0] = (int8_t)ASCII;
   2015         }
   2016 
   2017         /* get the source index of the last input character */
   2018         /*
   2019          * TODO this would be simpler and more reliable if we used a pair
   2020          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2021          * so that we could simply use the prevSourceIndex here;
   2022          * this code gives an incorrect result for the rare case of an unmatched
   2023          * trail surrogate that is alone in the last buffer of the text stream
   2024          */
   2025         sourceIndex=(int32_t)(source-args->source);
   2026         if(sourceIndex>0) {
   2027             --sourceIndex;
   2028             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2029                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2030             ) {
   2031                 --sourceIndex;
   2032             }
   2033         } else {
   2034             sourceIndex=-1;
   2035         }
   2036 
   2037         fromUWriteUInt8(
   2038             cnv,
   2039             buffer, outLen,
   2040             &target, (const char *)targetLimit,
   2041             &offsets, sourceIndex,
   2042             err);
   2043     }
   2044 
   2045     /*save the state and return */
   2046     args->source = source;
   2047     args->target = (char*)target;
   2048 }
   2049 
   2050 /*************** to unicode *******************/
   2051 
   2052 static void
   2053 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2054                                                UErrorCode* err){
   2055     char tempBuf[2];
   2056     const char *mySource = (char *) args->source;
   2057     UChar *myTarget = args->target;
   2058     const char *mySourceLimit = args->sourceLimit;
   2059     uint32_t targetUniChar = 0x0000;
   2060     uint32_t mySourceChar = 0x0000;
   2061     uint32_t tmpSourceChar = 0x0000;
   2062     UConverterDataISO2022* myData;
   2063     ISO2022State *pToU2022State;
   2064     StateEnum cs;
   2065 
   2066     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2067     pToU2022State = &myData->toU2022State;
   2068 
   2069     if(myData->key != 0) {
   2070         /* continue with a partial escape sequence */
   2071         goto escape;
   2072     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2073         /* continue with a partial double-byte character */
   2074         mySourceChar = args->converter->toUBytes[0];
   2075         args->converter->toULength = 0;
   2076         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2077         targetUniChar = missingCharMarker;
   2078         goto getTrailByte;
   2079     }
   2080 
   2081     while(mySource < mySourceLimit){
   2082 
   2083         targetUniChar =missingCharMarker;
   2084 
   2085         if(myTarget < args->targetLimit){
   2086 
   2087             mySourceChar= (unsigned char) *mySource++;
   2088 
   2089             switch(mySourceChar) {
   2090             case UCNV_SI:
   2091                 if(myData->version==3) {
   2092                     pToU2022State->g=0;
   2093                     continue;
   2094                 } else {
   2095                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2096                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2097                     break;
   2098                 }
   2099 
   2100             case UCNV_SO:
   2101                 if(myData->version==3) {
   2102                     /* JIS7: switch to G1 half-width Katakana */
   2103                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2104                     pToU2022State->g=1;
   2105                     continue;
   2106                 } else {
   2107                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2108                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2109                     break;
   2110                 }
   2111 
   2112             case ESC_2022:
   2113                 mySource--;
   2114 escape:
   2115                 {
   2116                     const char * mySourceBefore = mySource;
   2117                     int8_t toULengthBefore = args->converter->toULength;
   2118 
   2119                     changeState_2022(args->converter,&(mySource),
   2120                         mySourceLimit, ISO_2022_JP,err);
   2121 
   2122                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2123                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2124                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2125                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2126                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   2127                     }
   2128                 }
   2129 
   2130                 /* invalid or illegal escape sequence */
   2131                 if(U_FAILURE(*err)){
   2132                     args->target = myTarget;
   2133                     args->source = mySource;
   2134                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2135                     return;
   2136                 }
   2137                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2138                 if(myData->key==0) {
   2139                     myData->isEmptySegment = TRUE;
   2140                 }
   2141                 continue;
   2142 
   2143             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2144 
   2145             case CR:
   2146                 /*falls through*/
   2147             case LF:
   2148                 /* automatically reset to single-byte mode */
   2149                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2150                     pToU2022State->cs[0] = (int8_t)ASCII;
   2151                 }
   2152                 pToU2022State->cs[2] = 0;
   2153                 pToU2022State->g = 0;
   2154                 /* falls through */
   2155             default:
   2156                 /* convert one or two bytes */
   2157                 myData->isEmptySegment = FALSE;
   2158                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2159                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2160                     !IS_JP_DBCS(cs)
   2161                 ) {
   2162                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2163                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2164 
   2165                     /* return from a single-shift state to the previous one */
   2166                     if(pToU2022State->g >= 2) {
   2167                         pToU2022State->g=pToU2022State->prevG;
   2168                     }
   2169                 } else switch(cs) {
   2170                 case ASCII:
   2171                     if(mySourceChar <= 0x7f) {
   2172                         targetUniChar = mySourceChar;
   2173                     }
   2174                     break;
   2175                 case ISO8859_1:
   2176                     if(mySourceChar <= 0x7f) {
   2177                         targetUniChar = mySourceChar + 0x80;
   2178                     }
   2179                     /* return from a single-shift state to the previous one */
   2180                     pToU2022State->g=pToU2022State->prevG;
   2181                     break;
   2182                 case ISO8859_7:
   2183                     if(mySourceChar <= 0x7f) {
   2184                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2185                         targetUniChar =
   2186                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2187                                 myData->myConverterArray[cs],
   2188                                 mySourceChar + 0x80);
   2189                     }
   2190                     /* return from a single-shift state to the previous one */
   2191                     pToU2022State->g=pToU2022State->prevG;
   2192                     break;
   2193                 case JISX201:
   2194                     if(mySourceChar <= 0x7f) {
   2195                         targetUniChar = jisx201ToU(mySourceChar);
   2196                     }
   2197                     break;
   2198                 case HWKANA_7BIT:
   2199                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2200                         /* 7-bit halfwidth Katakana */
   2201                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2202                     }
   2203                     break;
   2204                 default:
   2205                     /* G0 DBCS */
   2206                     if(mySource < mySourceLimit) {
   2207                         int leadIsOk, trailIsOk;
   2208                         uint8_t trailByte;
   2209 getTrailByte:
   2210                         trailByte = (uint8_t)*mySource;
   2211                         /*
   2212                          * Ticket 5691: consistent illegal sequences:
   2213                          * - We include at least the first byte in the illegal sequence.
   2214                          * - If any of the non-initial bytes could be the start of a character,
   2215                          *   we stop the illegal sequence before the first one of those.
   2216                          *
   2217                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2218                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2219                          * Otherwise we convert or report the pair of bytes.
   2220                          */
   2221                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2222                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2223                         if (leadIsOk && trailIsOk) {
   2224                             ++mySource;
   2225                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2226                             if(cs == JISX208) {
   2227                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2228                                 mySourceChar = tmpSourceChar;
   2229                             } else {
   2230                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2231                                 mySourceChar = tmpSourceChar;
   2232                                 if (cs == KSC5601) {
   2233                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2234                                 }
   2235                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2236                                 tempBuf[1] = (char)(tmpSourceChar);
   2237                             }
   2238                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2239                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2240                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2241                             ++mySource;
   2242                             /* add another bit so that the code below writes 2 bytes in case of error */
   2243                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2244                         }
   2245                     } else {
   2246                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2247                         args->converter->toULength = 1;
   2248                         goto endloop;
   2249                     }
   2250                 }  /* End of inner switch */
   2251                 break;
   2252             }  /* End of outer switch */
   2253             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2254                 if(args->offsets){
   2255                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2256                 }
   2257                 *(myTarget++)=(UChar)targetUniChar;
   2258             }
   2259             else if(targetUniChar > missingCharMarker){
   2260                 /* disassemble the surrogate pair and write to output*/
   2261                 targetUniChar-=0x0010000;
   2262                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2263                 if(args->offsets){
   2264                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2265                 }
   2266                 ++myTarget;
   2267                 if(myTarget< args->targetLimit){
   2268                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2269                     if(args->offsets){
   2270                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2271                     }
   2272                     ++myTarget;
   2273                 }else{
   2274                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2275                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2276                 }
   2277 
   2278             }
   2279             else{
   2280                 /* Call the callback function*/
   2281                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2282                 break;
   2283             }
   2284         }
   2285         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2286             *err =U_BUFFER_OVERFLOW_ERROR;
   2287             break;
   2288         }
   2289     }
   2290 endloop:
   2291     args->target = myTarget;
   2292     args->source = mySource;
   2293 }
   2294 
   2295 
   2296 /***************************************************************
   2297 *   Rules for ISO-2022-KR encoding
   2298 *   i) The KSC5601 designator sequence should appear only once in a file,
   2299 *      at the begining of a line before any KSC5601 characters. This usually
   2300 *      means that it appears by itself on the first line of the file
   2301 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2302 *      and SI to shift into single byte mode
   2303 */
   2304 static void
   2305 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2306 
   2307     UConverter* saveConv = args->converter;
   2308     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2309     args->converter=myConverterData->currentConverter;
   2310 
   2311     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2312     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2313     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2314 
   2315     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2316         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2317             uprv_memcpy(
   2318                 saveConv->charErrorBuffer,
   2319                 myConverterData->currentConverter->charErrorBuffer,
   2320                 myConverterData->currentConverter->charErrorBufferLength);
   2321         }
   2322         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2323         myConverterData->currentConverter->charErrorBufferLength = 0;
   2324     }
   2325     args->converter=saveConv;
   2326 }
   2327 
   2328 static void
   2329 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2330 
   2331     const UChar *source = args->source;
   2332     const UChar *sourceLimit = args->sourceLimit;
   2333     unsigned char *target = (unsigned char *) args->target;
   2334     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2335     int32_t* offsets = args->offsets;
   2336     uint32_t targetByteUnit = 0x0000;
   2337     UChar32 sourceChar = 0x0000;
   2338     UBool isTargetByteDBCS;
   2339     UBool oldIsTargetByteDBCS;
   2340     UConverterDataISO2022 *converterData;
   2341     UConverterSharedData* sharedData;
   2342     UBool useFallback;
   2343     int32_t length =0;
   2344 
   2345     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2346     /* if the version is 1 then the user is requesting
   2347      * conversion with ibm-25546 pass the arguments to
   2348      * MBCS converter and return
   2349      */
   2350     if(converterData->version==1){
   2351         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2352         return;
   2353     }
   2354 
   2355     /* initialize data */
   2356     sharedData = converterData->currentConverter->sharedData;
   2357     useFallback = args->converter->useFallback;
   2358     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2359     oldIsTargetByteDBCS = isTargetByteDBCS;
   2360 
   2361     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2362     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2363         goto getTrail;
   2364     }
   2365     while(source < sourceLimit){
   2366 
   2367         targetByteUnit = missingCharMarker;
   2368 
   2369         if(target < (unsigned char*) args->targetLimit){
   2370             sourceChar = *source++;
   2371 
   2372             /* do not convert SO/SI/ESC */
   2373             if(IS_2022_CONTROL(sourceChar)) {
   2374                 /* callback(illegal) */
   2375                 *err=U_ILLEGAL_CHAR_FOUND;
   2376                 args->converter->fromUChar32=sourceChar;
   2377                 break;
   2378             }
   2379 
   2380             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2381             if(length < 0) {
   2382                 length = -length;  /* fallback */
   2383             }
   2384             /* only DBCS or SBCS characters are expected*/
   2385             /* DB characters with high bit set to 1 are expected */
   2386             if( length > 2 || length==0 ||
   2387                 (length == 1 && targetByteUnit > 0x7f) ||
   2388                 (length == 2 &&
   2389                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2390                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2391             ) {
   2392                 targetByteUnit=missingCharMarker;
   2393             }
   2394             if (targetByteUnit != missingCharMarker){
   2395 
   2396                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2397                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2398                   /* append the shift sequence */
   2399                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2400 
   2401                     if (isTargetByteDBCS)
   2402                         *target++ = UCNV_SO;
   2403                     else
   2404                         *target++ = UCNV_SI;
   2405                     if(offsets)
   2406                         *(offsets++) = (int32_t)(source - args->source-1);
   2407                 }
   2408                 /* write the targetUniChar  to target */
   2409                 if(targetByteUnit <= 0x00FF){
   2410                     if( target < targetLimit){
   2411                         *(target++) = (unsigned char) targetByteUnit;
   2412                         if(offsets){
   2413                             *(offsets++) = (int32_t)(source - args->source-1);
   2414                         }
   2415 
   2416                     }else{
   2417                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2418                         *err = U_BUFFER_OVERFLOW_ERROR;
   2419                     }
   2420                 }else{
   2421                     if(target < targetLimit){
   2422                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2423                         if(offsets){
   2424                             *(offsets++) = (int32_t)(source - args->source-1);
   2425                         }
   2426                         if(target < targetLimit){
   2427                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2428                             if(offsets){
   2429                                 *(offsets++) = (int32_t)(source - args->source-1);
   2430                             }
   2431                         }else{
   2432                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2433                             *err = U_BUFFER_OVERFLOW_ERROR;
   2434                         }
   2435                     }else{
   2436                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2437                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2438                         *err = U_BUFFER_OVERFLOW_ERROR;
   2439                     }
   2440                 }
   2441 
   2442             }
   2443             else{
   2444                 /* oops.. the code point is unassingned
   2445                  * set the error and reason
   2446                  */
   2447 
   2448                 /*check if the char is a First surrogate*/
   2449                 if(U16_IS_SURROGATE(sourceChar)) {
   2450                     if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2451 getTrail:
   2452                         /*look ahead to find the trail surrogate*/
   2453                         if(source <  sourceLimit) {
   2454                             /* test the following code unit */
   2455                             UChar trail=(UChar) *source;
   2456                             if(U16_IS_TRAIL(trail)) {
   2457                                 source++;
   2458                                 sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2459                                 *err = U_INVALID_CHAR_FOUND;
   2460                                 /* convert this surrogate code point */
   2461                                 /* exit this condition tree */
   2462                             } else {
   2463                                 /* this is an unmatched lead code unit (1st surrogate) */
   2464                                 /* callback(illegal) */
   2465                                 *err=U_ILLEGAL_CHAR_FOUND;
   2466                             }
   2467                         } else {
   2468                             /* no more input */
   2469                             *err = U_ZERO_ERROR;
   2470                         }
   2471                     } else {
   2472                         /* this is an unmatched trail code unit (2nd surrogate) */
   2473                         /* callback(illegal) */
   2474                         *err=U_ILLEGAL_CHAR_FOUND;
   2475                     }
   2476                 } else {
   2477                     /* callback(unassigned) for a BMP code point */
   2478                     *err = U_INVALID_CHAR_FOUND;
   2479                 }
   2480 
   2481                 args->converter->fromUChar32=sourceChar;
   2482                 break;
   2483             }
   2484         } /* end if(myTargetIndex<myTargetLength) */
   2485         else{
   2486             *err =U_BUFFER_OVERFLOW_ERROR;
   2487             break;
   2488         }
   2489 
   2490     }/* end while(mySourceIndex<mySourceLength) */
   2491 
   2492     /*
   2493      * the end of the input stream and detection of truncated input
   2494      * are handled by the framework, but for ISO-2022-KR conversion
   2495      * we need to be in ASCII mode at the very end
   2496      *
   2497      * conditions:
   2498      *   successful
   2499      *   not in ASCII mode
   2500      *   end of input and no truncated input
   2501      */
   2502     if( U_SUCCESS(*err) &&
   2503         isTargetByteDBCS &&
   2504         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2505     ) {
   2506         int32_t sourceIndex;
   2507 
   2508         /* we are switching to ASCII */
   2509         isTargetByteDBCS=FALSE;
   2510 
   2511         /* get the source index of the last input character */
   2512         /*
   2513          * TODO this would be simpler and more reliable if we used a pair
   2514          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2515          * so that we could simply use the prevSourceIndex here;
   2516          * this code gives an incorrect result for the rare case of an unmatched
   2517          * trail surrogate that is alone in the last buffer of the text stream
   2518          */
   2519         sourceIndex=(int32_t)(source-args->source);
   2520         if(sourceIndex>0) {
   2521             --sourceIndex;
   2522             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2523                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2524             ) {
   2525                 --sourceIndex;
   2526             }
   2527         } else {
   2528             sourceIndex=-1;
   2529         }
   2530 
   2531         fromUWriteUInt8(
   2532             args->converter,
   2533             SHIFT_IN_STR, 1,
   2534             &target, (const char *)targetLimit,
   2535             &offsets, sourceIndex,
   2536             err);
   2537     }
   2538 
   2539     /*save the state and return */
   2540     args->source = source;
   2541     args->target = (char*)target;
   2542     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2543 }
   2544 
   2545 /************************ To Unicode ***************************************/
   2546 
   2547 static void
   2548 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2549                                                             UErrorCode* err){
   2550     char const* sourceStart;
   2551     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2552 
   2553     UConverterToUnicodeArgs subArgs;
   2554     int32_t minArgsSize;
   2555 
   2556     /* set up the subconverter arguments */
   2557     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2558         minArgsSize = args->size;
   2559     } else {
   2560         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2561     }
   2562 
   2563     uprv_memcpy(&subArgs, args, minArgsSize);
   2564     subArgs.size = (uint16_t)minArgsSize;
   2565     subArgs.converter = myData->currentConverter;
   2566 
   2567     /* remember the original start of the input for offsets */
   2568     sourceStart = args->source;
   2569 
   2570     if(myData->key != 0) {
   2571         /* continue with a partial escape sequence */
   2572         goto escape;
   2573     }
   2574 
   2575     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2576         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2577         subArgs.source = args->source;
   2578         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2579         if(subArgs.source != subArgs.sourceLimit) {
   2580             /*
   2581              * get the current partial byte sequence
   2582              *
   2583              * it needs to be moved between the public and the subconverter
   2584              * so that the conversion framework, which only sees the public
   2585              * converter, can handle truncated and illegal input etc.
   2586              */
   2587             if(args->converter->toULength > 0) {
   2588                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2589             }
   2590             subArgs.converter->toULength = args->converter->toULength;
   2591 
   2592             /*
   2593              * Convert up to the end of the input, or to before the next escape character.
   2594              * Does not handle conversion extensions because the preToU[] state etc.
   2595              * is not copied.
   2596              */
   2597             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2598 
   2599             if(args->offsets != NULL && sourceStart != args->source) {
   2600                 /* update offsets to base them on the actual start of the input */
   2601                 int32_t *offsets = args->offsets;
   2602                 UChar *target = args->target;
   2603                 int32_t delta = (int32_t)(args->source - sourceStart);
   2604                 while(target < subArgs.target) {
   2605                     if(*offsets >= 0) {
   2606                         *offsets += delta;
   2607                     }
   2608                     ++offsets;
   2609                     ++target;
   2610                 }
   2611             }
   2612             args->source = subArgs.source;
   2613             args->target = subArgs.target;
   2614             args->offsets = subArgs.offsets;
   2615 
   2616             /* copy input/error/overflow buffers */
   2617             if(subArgs.converter->toULength > 0) {
   2618                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2619             }
   2620             args->converter->toULength = subArgs.converter->toULength;
   2621 
   2622             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2623                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2624                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2625                                 subArgs.converter->UCharErrorBufferLength);
   2626                 }
   2627                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2628                 subArgs.converter->UCharErrorBufferLength = 0;
   2629             }
   2630         }
   2631 
   2632         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2633             return;
   2634         }
   2635 
   2636 escape:
   2637         changeState_2022(args->converter,
   2638                &(args->source),
   2639                args->sourceLimit,
   2640                ISO_2022_KR,
   2641                err);
   2642     }
   2643 }
   2644 
   2645 static void
   2646 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2647                                                             UErrorCode* err){
   2648     char tempBuf[2];
   2649     const char *mySource = ( char *) args->source;
   2650     UChar *myTarget = args->target;
   2651     const char *mySourceLimit = args->sourceLimit;
   2652     UChar32 targetUniChar = 0x0000;
   2653     UChar mySourceChar = 0x0000;
   2654     UConverterDataISO2022* myData;
   2655     UConverterSharedData* sharedData ;
   2656     UBool useFallback;
   2657 
   2658     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2659     if(myData->version==1){
   2660         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2661         return;
   2662     }
   2663 
   2664     /* initialize state */
   2665     sharedData = myData->currentConverter->sharedData;
   2666     useFallback = args->converter->useFallback;
   2667 
   2668     if(myData->key != 0) {
   2669         /* continue with a partial escape sequence */
   2670         goto escape;
   2671     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2672         /* continue with a partial double-byte character */
   2673         mySourceChar = args->converter->toUBytes[0];
   2674         args->converter->toULength = 0;
   2675         goto getTrailByte;
   2676     }
   2677 
   2678     while(mySource< mySourceLimit){
   2679 
   2680         if(myTarget < args->targetLimit){
   2681 
   2682             mySourceChar= (unsigned char) *mySource++;
   2683 
   2684             if(mySourceChar==UCNV_SI){
   2685                 myData->toU2022State.g = 0;
   2686                 if (myData->isEmptySegment) {
   2687                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2688                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2689                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2690                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2691                     args->converter->toULength = 1;
   2692                     args->target = myTarget;
   2693                     args->source = mySource;
   2694                     return;
   2695                 }
   2696                 /*consume the source */
   2697                 continue;
   2698             }else if(mySourceChar==UCNV_SO){
   2699                 myData->toU2022State.g = 1;
   2700                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2701                 /*consume the source */
   2702                 continue;
   2703             }else if(mySourceChar==ESC_2022){
   2704                 mySource--;
   2705 escape:
   2706                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2707                 changeState_2022(args->converter,&(mySource),
   2708                                 mySourceLimit, ISO_2022_KR, err);
   2709                 if(U_FAILURE(*err)){
   2710                     args->target = myTarget;
   2711                     args->source = mySource;
   2712                     return;
   2713                 }
   2714                 continue;
   2715             }
   2716 
   2717             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2718             if(myData->toU2022State.g == 1) {
   2719                 if(mySource < mySourceLimit) {
   2720                     int leadIsOk, trailIsOk;
   2721                     uint8_t trailByte;
   2722 getTrailByte:
   2723                     targetUniChar = missingCharMarker;
   2724                     trailByte = (uint8_t)*mySource;
   2725                     /*
   2726                      * Ticket 5691: consistent illegal sequences:
   2727                      * - We include at least the first byte in the illegal sequence.
   2728                      * - If any of the non-initial bytes could be the start of a character,
   2729                      *   we stop the illegal sequence before the first one of those.
   2730                      *
   2731                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2732                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2733                      * Otherwise we convert or report the pair of bytes.
   2734                      */
   2735                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2736                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2737                     if (leadIsOk && trailIsOk) {
   2738                         ++mySource;
   2739                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2740                         tempBuf[1] = (char)(trailByte + 0x80);
   2741                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2742                         mySourceChar = (mySourceChar << 8) | trailByte;
   2743                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2744                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2745                         ++mySource;
   2746                         /* add another bit so that the code below writes 2 bytes in case of error */
   2747                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2748                     }
   2749                 } else {
   2750                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2751                     args->converter->toULength = 1;
   2752                     break;
   2753                 }
   2754             }
   2755             else if(mySourceChar <= 0x7f) {
   2756                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2757             } else {
   2758                 targetUniChar = 0xffff;
   2759             }
   2760             if(targetUniChar < 0xfffe){
   2761                 if(args->offsets) {
   2762                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2763                 }
   2764                 *(myTarget++)=(UChar)targetUniChar;
   2765             }
   2766             else {
   2767                 /* Call the callback function*/
   2768                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2769                 break;
   2770             }
   2771         }
   2772         else{
   2773             *err =U_BUFFER_OVERFLOW_ERROR;
   2774             break;
   2775         }
   2776     }
   2777     args->target = myTarget;
   2778     args->source = mySource;
   2779 }
   2780 
   2781 /*************************** END ISO2022-KR *********************************/
   2782 
   2783 /*************************** ISO-2022-CN *********************************
   2784 *
   2785 * Rules for ISO-2022-CN Encoding:
   2786 * i)   The designator sequence must appear once on a line before any instance
   2787 *      of character set it designates.
   2788 * ii)  If two lines contain characters from the same character set, both lines
   2789 *      must include the designator sequence.
   2790 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2791 *      to invoke the  shifting
   2792 * iv)  All lines start in ASCII and end in ASCII.
   2793 * v)   Four shifting sequences are employed for this purpose:
   2794 *
   2795 *      Sequcence   ASCII Eq    Charsets
   2796 *      ----------  -------    ---------
   2797 *      SI           <SI>        US-ASCII
   2798 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2799 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2800 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2801 *
   2802 * vi)
   2803 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2804 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2805 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2806 *
   2807 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2808 *       characters as defined in GB 2312-80, until
   2809 *       another SOdesignation appears
   2810 *
   2811 *
   2812 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2813 *       in ISO-IR-165 (for details, see section 2.1),
   2814 *       until another SOdesignation appears
   2815 *
   2816 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2817 *       in CNS 11643-plane-1, until another
   2818 *       SOdesignation appears
   2819 *
   2820 *      ESC $ * H       Indicates the two bytes immediately following
   2821 *       SS2 is a Chinese character as defined in CNS
   2822 *       11643-plane-2, until another SS2designation
   2823 *       appears
   2824 *       (Meaning <ESC>N must preceed every 2 byte
   2825 *        sequence.)
   2826 *
   2827 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2828 *       is a Chinese character as defined in CNS
   2829 *       11643-plane-3, until another SS3designation
   2830 *       appears
   2831 *       (Meaning <ESC>O must preceed every 2 byte
   2832 *        sequence.)
   2833 *
   2834 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2835 *       is a Chinese character as defined in CNS
   2836 *       11643-plane-4, until another SS3designation
   2837 *       appears
   2838 *       (In English: <ESC>O must preceed every 2 byte
   2839 *        sequence.)
   2840 *
   2841 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2842 *       is a Chinese character as defined in CNS
   2843 *       11643-plane-5, until another SS3designation
   2844 *       appears
   2845 *
   2846 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2847 *       is a Chinese character as defined in CNS
   2848 *       11643-plane-6, until another SS3designation
   2849 *       appears
   2850 *
   2851 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2852 *       is a Chinese character as defined in CNS
   2853 *       11643-plane-7, until another SS3designation
   2854 *       appears
   2855 *
   2856 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2857 *       has its own designation information before any Chinese characters
   2858 *       appear
   2859 *
   2860 */
   2861 
   2862 /* The following are defined this way to make the strings truly readonly */
   2863 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2864 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2865 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2866 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2867 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2868 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2869 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2870 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2871 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2872 
   2873 /********************** ISO2022-CN Data **************************/
   2874 static const char* const escSeqCharsCN[10] ={
   2875         SHIFT_IN_STR,                   /* 0 ASCII */
   2876         GB_2312_80_STR,                 /* 1 GB2312_1 */
   2877         ISO_IR_165_STR,                 /* 2 ISO_IR_165 */
   2878         CNS_11643_1992_Plane_1_STR,
   2879         CNS_11643_1992_Plane_2_STR,
   2880         CNS_11643_1992_Plane_3_STR,
   2881         CNS_11643_1992_Plane_4_STR,
   2882         CNS_11643_1992_Plane_5_STR,
   2883         CNS_11643_1992_Plane_6_STR,
   2884         CNS_11643_1992_Plane_7_STR
   2885 };
   2886 
   2887 static void
   2888 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2889     UConverter *cnv = args->converter;
   2890     UConverterDataISO2022 *converterData;
   2891     ISO2022State *pFromU2022State;
   2892     uint8_t *target = (uint8_t *) args->target;
   2893     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2894     const UChar* source = args->source;
   2895     const UChar* sourceLimit = args->sourceLimit;
   2896     int32_t* offsets = args->offsets;
   2897     UChar32 sourceChar;
   2898     char buffer[8];
   2899     int32_t len;
   2900     int8_t choices[3];
   2901     int32_t choiceCount;
   2902     uint32_t targetValue = 0;
   2903     UBool useFallback;
   2904 
   2905     /* set up the state */
   2906     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2907     pFromU2022State   = &converterData->fromU2022State;
   2908 
   2909     choiceCount = 0;
   2910 
   2911     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2912     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2913         goto getTrail;
   2914     }
   2915 
   2916     while( source < sourceLimit){
   2917         if(target < targetLimit){
   2918 
   2919             sourceChar  = *(source++);
   2920             /*check if the char is a First surrogate*/
   2921              if(U16_IS_SURROGATE(sourceChar)) {
   2922                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2923 getTrail:
   2924                     /*look ahead to find the trail surrogate*/
   2925                     if(source < sourceLimit) {
   2926                         /* test the following code unit */
   2927                         UChar trail=(UChar) *source;
   2928                         if(U16_IS_TRAIL(trail)) {
   2929                             source++;
   2930                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2931                             cnv->fromUChar32=0x00;
   2932                             /* convert this supplementary code point */
   2933                             /* exit this condition tree */
   2934                         } else {
   2935                             /* this is an unmatched lead code unit (1st surrogate) */
   2936                             /* callback(illegal) */
   2937                             *err=U_ILLEGAL_CHAR_FOUND;
   2938                             cnv->fromUChar32=sourceChar;
   2939                             break;
   2940                         }
   2941                     } else {
   2942                         /* no more input */
   2943                         cnv->fromUChar32=sourceChar;
   2944                         break;
   2945                     }
   2946                 } else {
   2947                     /* this is an unmatched trail code unit (2nd surrogate) */
   2948                     /* callback(illegal) */
   2949                     *err=U_ILLEGAL_CHAR_FOUND;
   2950                     cnv->fromUChar32=sourceChar;
   2951                     break;
   2952                 }
   2953             }
   2954 
   2955             /* do the conversion */
   2956             if(sourceChar <= 0x007f ){
   2957                 /* do not convert SO/SI/ESC */
   2958                 if(IS_2022_CONTROL(sourceChar)) {
   2959                     /* callback(illegal) */
   2960                     *err=U_ILLEGAL_CHAR_FOUND;
   2961                     cnv->fromUChar32=sourceChar;
   2962                     break;
   2963                 }
   2964 
   2965                 /* US-ASCII */
   2966                 if(pFromU2022State->g == 0) {
   2967                     buffer[0] = (char)sourceChar;
   2968                     len = 1;
   2969                 } else {
   2970                     buffer[0] = UCNV_SI;
   2971                     buffer[1] = (char)sourceChar;
   2972                     len = 2;
   2973                     pFromU2022State->g = 0;
   2974                     choiceCount = 0;
   2975                 }
   2976                 if(sourceChar == CR || sourceChar == LF) {
   2977                     /* reset the state at the end of a line */
   2978                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2979                     choiceCount = 0;
   2980                 }
   2981             }
   2982             else{
   2983                 /* convert U+0080..U+10ffff */
   2984                 int32_t i;
   2985                 int8_t cs, g;
   2986 
   2987                 if(choiceCount == 0) {
   2988                     /* try the current SO/G1 converter first */
   2989                     choices[0] = pFromU2022State->cs[1];
   2990 
   2991                     /* default to GB2312_1 if none is designated yet */
   2992                     if(choices[0] == 0) {
   2993                         choices[0] = GB2312_1;
   2994                     }
   2995 
   2996                     if(converterData->version == 0) {
   2997                         /* ISO-2022-CN */
   2998 
   2999                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   3000                         if(choices[0] == GB2312_1) {
   3001                             choices[1] = (int8_t)CNS_11643_1;
   3002                         } else {
   3003                             choices[1] = (int8_t)GB2312_1;
   3004                         }
   3005 
   3006                         choiceCount = 2;
   3007                     } else if (converterData->version == 1) {
   3008                         /* ISO-2022-CN-EXT */
   3009 
   3010                         /* try one of the other converters */
   3011                         switch(choices[0]) {
   3012                         case GB2312_1:
   3013                             choices[1] = (int8_t)CNS_11643_1;
   3014                             choices[2] = (int8_t)ISO_IR_165;
   3015                             break;
   3016                         case ISO_IR_165:
   3017                             choices[1] = (int8_t)GB2312_1;
   3018                             choices[2] = (int8_t)CNS_11643_1;
   3019                             break;
   3020                         default: /* CNS_11643_x */
   3021                             choices[1] = (int8_t)GB2312_1;
   3022                             choices[2] = (int8_t)ISO_IR_165;
   3023                             break;
   3024                         }
   3025 
   3026                         choiceCount = 3;
   3027                     } else {
   3028                         choices[0] = (int8_t)CNS_11643_1;
   3029                         choices[1] = (int8_t)GB2312_1;
   3030                     }
   3031                 }
   3032 
   3033                 cs = g = 0;
   3034                 /*
   3035                  * len==0: no mapping found yet
   3036                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3037                  * len>0: found a roundtrip result, done
   3038                  */
   3039                 len = 0;
   3040                 /*
   3041                  * We will turn off useFallback after finding a fallback,
   3042                  * but we still get fallbacks from PUA code points as usual.
   3043                  * Therefore, we will also need to check that we don't overwrite
   3044                  * an early fallback with a later one.
   3045                  */
   3046                 useFallback = cnv->useFallback;
   3047 
   3048                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3049                     int8_t cs0 = choices[i];
   3050                     if(cs0 > 0) {
   3051                         uint32_t value;
   3052                         int32_t len2;
   3053                         if(cs0 >= CNS_11643_0) {
   3054                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3055                                         converterData->myConverterArray[CNS_11643],
   3056                                         sourceChar,
   3057                                         &value,
   3058                                         useFallback,
   3059                                         MBCS_OUTPUT_3);
   3060                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3061                                 targetValue = value;
   3062                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3063                                 if(len2 >= 0) {
   3064                                     len = 2;
   3065                                 } else {
   3066                                     len = -2;
   3067                                     useFallback = FALSE;
   3068                                 }
   3069                                 if(cs == CNS_11643_1) {
   3070                                     g = 1;
   3071                                 } else if(cs == CNS_11643_2) {
   3072                                     g = 2;
   3073                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3074                                     g = 3;
   3075                                 } else {
   3076                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3077                                     len = 0;
   3078                                 }
   3079                             }
   3080                         } else {
   3081                             /* GB2312_1 or ISO-IR-165 */
   3082                             U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
   3083                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3084                                         converterData->myConverterArray[cs0],
   3085                                         sourceChar,
   3086                                         &value,
   3087                                         useFallback,
   3088                                         MBCS_OUTPUT_2);
   3089                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3090                                 targetValue = value;
   3091                                 len = len2;
   3092                                 cs = cs0;
   3093                                 g = 1;
   3094                                 useFallback = FALSE;
   3095                             }
   3096                         }
   3097                     }
   3098                 }
   3099 
   3100                 if(len != 0) {
   3101                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3102 
   3103                     /* write the designation sequence if necessary */
   3104                     if(cs != pFromU2022State->cs[g]) {
   3105                         if(cs < CNS_11643) {
   3106                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3107                         } else {
   3108                             U_ASSERT(cs >= CNS_11643_1);
   3109                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3110                         }
   3111                         len = 4;
   3112                         pFromU2022State->cs[g] = cs;
   3113                         if(g == 1) {
   3114                             /* changing the SO/G1 charset invalidates the choices[] */
   3115                             choiceCount = 0;
   3116                         }
   3117                     }
   3118 
   3119                     /* write the shift sequence if necessary */
   3120                     if(g != pFromU2022State->g) {
   3121                         switch(g) {
   3122                         case 1:
   3123                             buffer[len++] = UCNV_SO;
   3124 
   3125                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3126                             pFromU2022State->g = 1;
   3127                             break;
   3128                         case 2:
   3129                             buffer[len++] = 0x1b;
   3130                             buffer[len++] = 0x4e;
   3131                             break;
   3132                         default: /* case 3 */
   3133                             buffer[len++] = 0x1b;
   3134                             buffer[len++] = 0x4f;
   3135                             break;
   3136                         }
   3137                     }
   3138 
   3139                     /* write the two output bytes */
   3140                     buffer[len++] = (char)(targetValue >> 8);
   3141                     buffer[len++] = (char)targetValue;
   3142                 } else {
   3143                     /* if we cannot find the character after checking all codepages
   3144                      * then this is an error
   3145                      */
   3146                     *err = U_INVALID_CHAR_FOUND;
   3147                     cnv->fromUChar32=sourceChar;
   3148                     break;
   3149                 }
   3150             }
   3151 
   3152             /* output len>0 bytes in buffer[] */
   3153             if(len == 1) {
   3154                 *target++ = buffer[0];
   3155                 if(offsets) {
   3156                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3157                 }
   3158             } else if(len == 2 && (target + 2) <= targetLimit) {
   3159                 *target++ = buffer[0];
   3160                 *target++ = buffer[1];
   3161                 if(offsets) {
   3162                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3163                     *offsets++ = sourceIndex;
   3164                     *offsets++ = sourceIndex;
   3165                 }
   3166             } else {
   3167                 fromUWriteUInt8(
   3168                     cnv,
   3169                     buffer, len,
   3170                     &target, (const char *)targetLimit,
   3171                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3172                     err);
   3173                 if(U_FAILURE(*err)) {
   3174                     break;
   3175                 }
   3176             }
   3177         } /* end if(myTargetIndex<myTargetLength) */
   3178         else{
   3179             *err =U_BUFFER_OVERFLOW_ERROR;
   3180             break;
   3181         }
   3182 
   3183     }/* end while(mySourceIndex<mySourceLength) */
   3184 
   3185     /*
   3186      * the end of the input stream and detection of truncated input
   3187      * are handled by the framework, but for ISO-2022-CN conversion
   3188      * we need to be in ASCII mode at the very end
   3189      *
   3190      * conditions:
   3191      *   successful
   3192      *   not in ASCII mode
   3193      *   end of input and no truncated input
   3194      */
   3195     if( U_SUCCESS(*err) &&
   3196         pFromU2022State->g!=0 &&
   3197         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3198     ) {
   3199         int32_t sourceIndex;
   3200 
   3201         /* we are switching to ASCII */
   3202         pFromU2022State->g=0;
   3203 
   3204         /* get the source index of the last input character */
   3205         /*
   3206          * TODO this would be simpler and more reliable if we used a pair
   3207          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3208          * so that we could simply use the prevSourceIndex here;
   3209          * this code gives an incorrect result for the rare case of an unmatched
   3210          * trail surrogate that is alone in the last buffer of the text stream
   3211          */
   3212         sourceIndex=(int32_t)(source-args->source);
   3213         if(sourceIndex>0) {
   3214             --sourceIndex;
   3215             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3216                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3217             ) {
   3218                 --sourceIndex;
   3219             }
   3220         } else {
   3221             sourceIndex=-1;
   3222         }
   3223 
   3224         fromUWriteUInt8(
   3225             cnv,
   3226             SHIFT_IN_STR, 1,
   3227             &target, (const char *)targetLimit,
   3228             &offsets, sourceIndex,
   3229             err);
   3230     }
   3231 
   3232     /*save the state and return */
   3233     args->source = source;
   3234     args->target = (char*)target;
   3235 }
   3236 
   3237 
   3238 static void
   3239 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3240                                                UErrorCode* err){
   3241     char tempBuf[3];
   3242     const char *mySource = (char *) args->source;
   3243     UChar *myTarget = args->target;
   3244     const char *mySourceLimit = args->sourceLimit;
   3245     uint32_t targetUniChar = 0x0000;
   3246     uint32_t mySourceChar = 0x0000;
   3247     UConverterDataISO2022* myData;
   3248     ISO2022State *pToU2022State;
   3249 
   3250     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3251     pToU2022State = &myData->toU2022State;
   3252 
   3253     if(myData->key != 0) {
   3254         /* continue with a partial escape sequence */
   3255         goto escape;
   3256     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3257         /* continue with a partial double-byte character */
   3258         mySourceChar = args->converter->toUBytes[0];
   3259         args->converter->toULength = 0;
   3260         targetUniChar = missingCharMarker;
   3261         goto getTrailByte;
   3262     }
   3263 
   3264     while(mySource < mySourceLimit){
   3265 
   3266         targetUniChar =missingCharMarker;
   3267 
   3268         if(myTarget < args->targetLimit){
   3269 
   3270             mySourceChar= (unsigned char) *mySource++;
   3271 
   3272             switch(mySourceChar){
   3273             case UCNV_SI:
   3274                 pToU2022State->g=0;
   3275                 if (myData->isEmptySegment) {
   3276                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3277                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3278                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3279                     args->converter->toUBytes[0] = mySourceChar;
   3280                     args->converter->toULength = 1;
   3281                     args->target = myTarget;
   3282                     args->source = mySource;
   3283                     return;
   3284                 }
   3285                 continue;
   3286 
   3287             case UCNV_SO:
   3288                 if(pToU2022State->cs[1] != 0) {
   3289                     pToU2022State->g=1;
   3290                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3291                     continue;
   3292                 } else {
   3293                     /* illegal to have SO before a matching designator */
   3294                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3295                     break;
   3296                 }
   3297 
   3298             case ESC_2022:
   3299                 mySource--;
   3300 escape:
   3301                 {
   3302                     const char * mySourceBefore = mySource;
   3303                     int8_t toULengthBefore = args->converter->toULength;
   3304 
   3305                     changeState_2022(args->converter,&(mySource),
   3306                         mySourceLimit, ISO_2022_CN,err);
   3307 
   3308                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3309                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3310                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3311                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3312                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   3313                     }
   3314                 }
   3315 
   3316                 /* invalid or illegal escape sequence */
   3317                 if(U_FAILURE(*err)){
   3318                     args->target = myTarget;
   3319                     args->source = mySource;
   3320                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3321                     return;
   3322                 }
   3323                 continue;
   3324 
   3325             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3326 
   3327             case CR:
   3328                 /*falls through*/
   3329             case LF:
   3330                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3331                 /* falls through */
   3332             default:
   3333                 /* convert one or two bytes */
   3334                 myData->isEmptySegment = FALSE;
   3335                 if(pToU2022State->g != 0) {
   3336                     if(mySource < mySourceLimit) {
   3337                         UConverterSharedData *cnv;
   3338                         StateEnum tempState;
   3339                         int32_t tempBufLen;
   3340                         int leadIsOk, trailIsOk;
   3341                         uint8_t trailByte;
   3342 getTrailByte:
   3343                         trailByte = (uint8_t)*mySource;
   3344                         /*
   3345                          * Ticket 5691: consistent illegal sequences:
   3346                          * - We include at least the first byte in the illegal sequence.
   3347                          * - If any of the non-initial bytes could be the start of a character,
   3348                          *   we stop the illegal sequence before the first one of those.
   3349                          *
   3350                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3351                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3352                          * Otherwise we convert or report the pair of bytes.
   3353                          */
   3354                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3355                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3356                         if (leadIsOk && trailIsOk) {
   3357                             ++mySource;
   3358                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3359                             if(tempState >= CNS_11643_0) {
   3360                                 cnv = myData->myConverterArray[CNS_11643];
   3361                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3362                                 tempBuf[1] = (char) (mySourceChar);
   3363                                 tempBuf[2] = (char) trailByte;
   3364                                 tempBufLen = 3;
   3365 
   3366                             }else{
   3367                                 U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
   3368                                 cnv = myData->myConverterArray[tempState];
   3369                                 tempBuf[0] = (char) (mySourceChar);
   3370                                 tempBuf[1] = (char) trailByte;
   3371                                 tempBufLen = 2;
   3372                             }
   3373                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3374                             mySourceChar = (mySourceChar << 8) | trailByte;
   3375                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3376                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3377                             ++mySource;
   3378                             /* add another bit so that the code below writes 2 bytes in case of error */
   3379                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3380                         }
   3381                         if(pToU2022State->g>=2) {
   3382                             /* return from a single-shift state to the previous one */
   3383                             pToU2022State->g=pToU2022State->prevG;
   3384                         }
   3385                     } else {
   3386                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3387                         args->converter->toULength = 1;
   3388                         goto endloop;
   3389                     }
   3390                 }
   3391                 else{
   3392                     if(mySourceChar <= 0x7f) {
   3393                         targetUniChar = (UChar) mySourceChar;
   3394                     }
   3395                 }
   3396                 break;
   3397             }
   3398             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3399                 if(args->offsets){
   3400                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3401                 }
   3402                 *(myTarget++)=(UChar)targetUniChar;
   3403             }
   3404             else if(targetUniChar > missingCharMarker){
   3405                 /* disassemble the surrogate pair and write to output*/
   3406                 targetUniChar-=0x0010000;
   3407                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3408                 if(args->offsets){
   3409                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3410                 }
   3411                 ++myTarget;
   3412                 if(myTarget< args->targetLimit){
   3413                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3414                     if(args->offsets){
   3415                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3416                     }
   3417                     ++myTarget;
   3418                 }else{
   3419                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3420                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3421                 }
   3422 
   3423             }
   3424             else{
   3425                 /* Call the callback function*/
   3426                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3427                 break;
   3428             }
   3429         }
   3430         else{
   3431             *err =U_BUFFER_OVERFLOW_ERROR;
   3432             break;
   3433         }
   3434     }
   3435 endloop:
   3436     args->target = myTarget;
   3437     args->source = mySource;
   3438 }
   3439 
   3440 static void
   3441 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3442     UConverter *cnv = args->converter;
   3443     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3444     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3445     char *p, *subchar;
   3446     char buffer[8];
   3447     int32_t length;
   3448 
   3449     subchar=(char *)cnv->subChars;
   3450     length=cnv->subCharLen; /* assume length==1 for most variants */
   3451 
   3452     p = buffer;
   3453     switch(myConverterData->locale[0]){
   3454     case 'j':
   3455         {
   3456             int8_t cs;
   3457 
   3458             if(pFromU2022State->g == 1) {
   3459                 /* JIS7: switch from G1 to G0 */
   3460                 pFromU2022State->g = 0;
   3461                 *p++ = UCNV_SI;
   3462             }
   3463 
   3464             cs = pFromU2022State->cs[0];
   3465             if(cs != ASCII && cs != JISX201) {
   3466                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3467                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3468                 *p++ = '\x1b';
   3469                 *p++ = '\x28';
   3470                 *p++ = '\x42';
   3471             }
   3472 
   3473             *p++ = subchar[0];
   3474             break;
   3475         }
   3476     case 'c':
   3477         if(pFromU2022State->g != 0) {
   3478             /* not in ASCII mode: switch to ASCII */
   3479             pFromU2022State->g = 0;
   3480             *p++ = UCNV_SI;
   3481         }
   3482         *p++ = subchar[0];
   3483         break;
   3484     case 'k':
   3485         if(myConverterData->version == 0) {
   3486             if(length == 1) {
   3487                 if((UBool)args->converter->fromUnicodeStatus) {
   3488                     /* in DBCS mode: switch to SBCS */
   3489                     args->converter->fromUnicodeStatus = 0;
   3490                     *p++ = UCNV_SI;
   3491                 }
   3492                 *p++ = subchar[0];
   3493             } else /* length == 2*/ {
   3494                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3495                     /* in SBCS mode: switch to DBCS */
   3496                     args->converter->fromUnicodeStatus = 1;
   3497                     *p++ = UCNV_SO;
   3498                 }
   3499                 *p++ = subchar[0];
   3500                 *p++ = subchar[1];
   3501             }
   3502             break;
   3503         } else {
   3504             /* save the subconverter's substitution string */
   3505             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3506             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3507 
   3508             /* set our substitution string into the subconverter */
   3509             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3510             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3511 
   3512             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3513             args->converter = myConverterData->currentConverter;
   3514             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3515             ucnv_cbFromUWriteSub(args, 0, err);
   3516             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3517             args->converter = cnv;
   3518 
   3519             /* restore the subconverter's substitution string */
   3520             myConverterData->currentConverter->subChars = currentSubChars;
   3521             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3522 
   3523             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3524                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3525                     uprv_memcpy(
   3526                         cnv->charErrorBuffer,
   3527                         myConverterData->currentConverter->charErrorBuffer,
   3528                         myConverterData->currentConverter->charErrorBufferLength);
   3529                 }
   3530                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3531                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3532             }
   3533             return;
   3534         }
   3535     default:
   3536         /* not expected */
   3537         break;
   3538     }
   3539     ucnv_cbFromUWriteBytes(args,
   3540                            buffer, (int32_t)(p - buffer),
   3541                            offsetIndex, err);
   3542 }
   3543 
   3544 /*
   3545  * Structure for cloning an ISO 2022 converter into a single memory block.
   3546  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3547  * and then ucnv_safeClone() of the sub-converter may additionally align
   3548  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3549  * after currentConverter.
   3550  * This is because UAlignedMemory may be larger than the actually
   3551  * necessary alignment size for the platform.
   3552  * The other cloneStruct fields will not be moved around,
   3553  * and are aligned properly with cloneStruct's alignment.
   3554  */
   3555 struct cloneStruct
   3556 {
   3557     UConverter cnv;
   3558     UConverter currentConverter;
   3559     UAlignedMemory deadSpace;
   3560     UConverterDataISO2022 mydata;
   3561 };
   3562 
   3563 
   3564 static UConverter *
   3565 _ISO_2022_SafeClone(
   3566             const UConverter *cnv,
   3567             void *stackBuffer,
   3568             int32_t *pBufferSize,
   3569             UErrorCode *status)
   3570 {
   3571     struct cloneStruct * localClone;
   3572     UConverterDataISO2022 *cnvData;
   3573     int32_t i, size;
   3574 
   3575     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3576         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3577         return NULL;
   3578     }
   3579 
   3580     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3581     localClone = (struct cloneStruct *)stackBuffer;
   3582 
   3583     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3584 
   3585     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3586     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3587     localClone->cnv.isExtraLocal = TRUE;
   3588 
   3589     /* share the subconverters */
   3590 
   3591     if(cnvData->currentConverter != NULL) {
   3592         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3593         localClone->mydata.currentConverter =
   3594             ucnv_safeClone(cnvData->currentConverter,
   3595                             &localClone->currentConverter,
   3596                             &size, status);
   3597         if(U_FAILURE(*status)) {
   3598             return NULL;
   3599         }
   3600     }
   3601 
   3602     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3603         if(cnvData->myConverterArray[i] != NULL) {
   3604             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3605         }
   3606     }
   3607 
   3608     return &localClone->cnv;
   3609 }
   3610 
   3611 static void
   3612 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3613                     const USetAdder *sa,
   3614                     UConverterUnicodeSet which,
   3615                     UErrorCode *pErrorCode)
   3616 {
   3617     int32_t i;
   3618     UConverterDataISO2022* cnvData;
   3619 
   3620     if (U_FAILURE(*pErrorCode)) {
   3621         return;
   3622     }
   3623 #ifdef U_ENABLE_GENERIC_ISO_2022
   3624     if (cnv->sharedData == &_ISO2022Data) {
   3625         /* We use UTF-8 in this case */
   3626         sa->addRange(sa->set, 0, 0xd7FF);
   3627         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3628         return;
   3629     }
   3630 #endif
   3631 
   3632     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3633 
   3634     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3635     switch(cnvData->locale[0]){
   3636     case 'j':
   3637         /* include JIS X 0201 which is hardcoded */
   3638         sa->add(sa->set, 0xa5);
   3639         sa->add(sa->set, 0x203e);
   3640         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3641             /* include Latin-1 for some variants of JP */
   3642             sa->addRange(sa->set, 0, 0xff);
   3643         } else {
   3644             /* include ASCII for JP */
   3645             sa->addRange(sa->set, 0, 0x7f);
   3646         }
   3647         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3648             /*
   3649              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3650              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3651              * use half-width Katakana.
   3652              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3653              * half-width Katakana via the ESC ( I sequence.
   3654              * However, we only emit (fromUnicode) half-width Katakana according to the
   3655              * definition of each variant.
   3656              *
   3657              * When including fallbacks,
   3658              * we need to include half-width Katakana Unicode code points for all JP variants because
   3659              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3660              */
   3661             /* include half-width Katakana for JP */
   3662             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3663         }
   3664         break;
   3665     case 'c':
   3666     case 'z':
   3667         /* include ASCII for CN */
   3668         sa->addRange(sa->set, 0, 0x7f);
   3669         break;
   3670     case 'k':
   3671         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3672         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3673                 cnvData->currentConverter, sa, which, pErrorCode);
   3674         /* the loop over myConverterArray[] will simply not find another converter */
   3675         break;
   3676     default:
   3677         break;
   3678     }
   3679 
   3680 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3681             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3682                 cnvData->version==0 && i==CNS_11643
   3683             ) {
   3684                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3685                 ucnv_MBCSGetUnicodeSetForBytes(
   3686                         cnvData->myConverterArray[i],
   3687                         sa, UCNV_ROUNDTRIP_SET,
   3688                         0, 0x81, 0x82,
   3689                         pErrorCode);
   3690             }
   3691 #endif
   3692 
   3693     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3694         UConverterSetFilter filter;
   3695         if(cnvData->myConverterArray[i]!=NULL) {
   3696             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3697                 cnvData->version==0 && i==CNS_11643
   3698             ) {
   3699                 /*
   3700                  * Version-specific for CN:
   3701                  * CN version 0 does not map CNS planes 3..7 although
   3702                  * they are all available in the CNS conversion table;
   3703                  * CN version 1 (-EXT) does map them all.
   3704                  * The two versions create different Unicode sets.
   3705                  */
   3706                 filter=UCNV_SET_FILTER_2022_CN;
   3707             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3708                 /*
   3709                  * Only add code points that map to Shift-JIS codes
   3710                  * corresponding to JIS X 0208.
   3711                  */
   3712                 filter=UCNV_SET_FILTER_SJIS;
   3713             } else if(i==KSC5601) {
   3714                 /*
   3715                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3716                  * are broader than GR94.
   3717                  */
   3718                 filter=UCNV_SET_FILTER_GR94DBCS;
   3719             } else {
   3720                 filter=UCNV_SET_FILTER_NONE;
   3721             }
   3722             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3723         }
   3724     }
   3725 
   3726     /*
   3727      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3728      * sub-converters do by themselves.
   3729      * Remove these characters from the set.
   3730      */
   3731     sa->remove(sa->set, 0x0e);
   3732     sa->remove(sa->set, 0x0f);
   3733     sa->remove(sa->set, 0x1b);
   3734 
   3735     /* ISO 2022 converters do not convert C1 controls either */
   3736     sa->removeRange(sa->set, 0x80, 0x9f);
   3737 }
   3738 
   3739 static const UConverterImpl _ISO2022Impl={
   3740     UCNV_ISO_2022,
   3741 
   3742     NULL,
   3743     NULL,
   3744 
   3745     _ISO2022Open,
   3746     _ISO2022Close,
   3747     _ISO2022Reset,
   3748 
   3749 #ifdef U_ENABLE_GENERIC_ISO_2022
   3750     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3751     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3752     ucnv_fromUnicode_UTF8,
   3753     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3754 #else
   3755     NULL,
   3756     NULL,
   3757     NULL,
   3758     NULL,
   3759 #endif
   3760     NULL,
   3761 
   3762     NULL,
   3763     _ISO2022getName,
   3764     _ISO_2022_WriteSub,
   3765     _ISO_2022_SafeClone,
   3766     _ISO_2022_GetUnicodeSet,
   3767 
   3768     NULL,
   3769     NULL
   3770 };
   3771 static const UConverterStaticData _ISO2022StaticData={
   3772     sizeof(UConverterStaticData),
   3773     "ISO_2022",
   3774     2022,
   3775     UCNV_IBM,
   3776     UCNV_ISO_2022,
   3777     1,
   3778     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3779     { 0x1a, 0, 0, 0 },
   3780     1,
   3781     FALSE,
   3782     FALSE,
   3783     0,
   3784     0,
   3785     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3786 };
   3787 const UConverterSharedData _ISO2022Data={
   3788     sizeof(UConverterSharedData),
   3789     ~((uint32_t) 0),
   3790     NULL,
   3791     NULL,
   3792     &_ISO2022StaticData,
   3793     FALSE,
   3794     &_ISO2022Impl,
   3795     0, UCNV_MBCS_TABLE_INITIALIZER
   3796 };
   3797 
   3798 /*************JP****************/
   3799 static const UConverterImpl _ISO2022JPImpl={
   3800     UCNV_ISO_2022,
   3801 
   3802     NULL,
   3803     NULL,
   3804 
   3805     _ISO2022Open,
   3806     _ISO2022Close,
   3807     _ISO2022Reset,
   3808 
   3809     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3810     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3811     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3812     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3813     NULL,
   3814 
   3815     NULL,
   3816     _ISO2022getName,
   3817     _ISO_2022_WriteSub,
   3818     _ISO_2022_SafeClone,
   3819     _ISO_2022_GetUnicodeSet,
   3820 
   3821     NULL,
   3822     NULL
   3823 };
   3824 static const UConverterStaticData _ISO2022JPStaticData={
   3825     sizeof(UConverterStaticData),
   3826     "ISO_2022_JP",
   3827     0,
   3828     UCNV_IBM,
   3829     UCNV_ISO_2022,
   3830     1,
   3831     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3832     { 0x1a, 0, 0, 0 },
   3833     1,
   3834     FALSE,
   3835     FALSE,
   3836     0,
   3837     0,
   3838     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3839 };
   3840 
   3841 namespace {
   3842 
   3843 const UConverterSharedData _ISO2022JPData={
   3844     sizeof(UConverterSharedData),
   3845     ~((uint32_t) 0),
   3846     NULL,
   3847     NULL,
   3848     &_ISO2022JPStaticData,
   3849     FALSE,
   3850     &_ISO2022JPImpl,
   3851     0, UCNV_MBCS_TABLE_INITIALIZER
   3852 };
   3853 
   3854 }  // namespace
   3855 
   3856 /************* KR ***************/
   3857 static const UConverterImpl _ISO2022KRImpl={
   3858     UCNV_ISO_2022,
   3859 
   3860     NULL,
   3861     NULL,
   3862 
   3863     _ISO2022Open,
   3864     _ISO2022Close,
   3865     _ISO2022Reset,
   3866 
   3867     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3868     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3869     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3870     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3871     NULL,
   3872 
   3873     NULL,
   3874     _ISO2022getName,
   3875     _ISO_2022_WriteSub,
   3876     _ISO_2022_SafeClone,
   3877     _ISO_2022_GetUnicodeSet,
   3878 
   3879     NULL,
   3880     NULL
   3881 };
   3882 static const UConverterStaticData _ISO2022KRStaticData={
   3883     sizeof(UConverterStaticData),
   3884     "ISO_2022_KR",
   3885     0,
   3886     UCNV_IBM,
   3887     UCNV_ISO_2022,
   3888     1,
   3889     3, /* max 3 bytes per UChar: SO+DBCS */
   3890     { 0x1a, 0, 0, 0 },
   3891     1,
   3892     FALSE,
   3893     FALSE,
   3894     0,
   3895     0,
   3896     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3897 };
   3898 
   3899 namespace {
   3900 
   3901 const UConverterSharedData _ISO2022KRData={
   3902     sizeof(UConverterSharedData),
   3903     ~((uint32_t) 0),
   3904     NULL,
   3905     NULL,
   3906     &_ISO2022KRStaticData,
   3907     FALSE,
   3908     &_ISO2022KRImpl,
   3909     0, UCNV_MBCS_TABLE_INITIALIZER
   3910 };
   3911 
   3912 }  // namespace
   3913 
   3914 /*************** CN ***************/
   3915 static const UConverterImpl _ISO2022CNImpl={
   3916 
   3917     UCNV_ISO_2022,
   3918 
   3919     NULL,
   3920     NULL,
   3921 
   3922     _ISO2022Open,
   3923     _ISO2022Close,
   3924     _ISO2022Reset,
   3925 
   3926     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3927     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3928     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3929     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3930     NULL,
   3931 
   3932     NULL,
   3933     _ISO2022getName,
   3934     _ISO_2022_WriteSub,
   3935     _ISO_2022_SafeClone,
   3936     _ISO_2022_GetUnicodeSet,
   3937 
   3938     NULL,
   3939     NULL
   3940 };
   3941 static const UConverterStaticData _ISO2022CNStaticData={
   3942     sizeof(UConverterStaticData),
   3943     "ISO_2022_CN",
   3944     0,
   3945     UCNV_IBM,
   3946     UCNV_ISO_2022,
   3947     1,
   3948     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3949     { 0x1a, 0, 0, 0 },
   3950     1,
   3951     FALSE,
   3952     FALSE,
   3953     0,
   3954     0,
   3955     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3956 };
   3957 
   3958 namespace {
   3959 
   3960 const UConverterSharedData _ISO2022CNData={
   3961     sizeof(UConverterSharedData),
   3962     ~((uint32_t) 0),
   3963     NULL,
   3964     NULL,
   3965     &_ISO2022CNStaticData,
   3966     FALSE,
   3967     &_ISO2022CNImpl,
   3968     0, UCNV_MBCS_TABLE_INITIALIZER
   3969 };
   3970 
   3971 }  // namespace
   3972 
   3973 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3974