Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2000-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  ucnv2022.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2000feb03
     12 *   created by: Markus W. Scherer
     13 *
     14 *   Change history:
     15 *
     16 *   06/29/2000  helena  Major rewrite of the callback APIs.
     17 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     18 *                       Changed implementation of toUnicode
     19 *                       function
     20 *   08/21/2000  Ram     Added support for ISO-2022-KR
     21 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     22 *                       ucnvebdc.c
     23 *   09/20/2000  Ram     Added support for ISO-2022-CN
     24 *                       Added implementations for getNextUChar()
     25 *                       for specific 2022 country variants.
     26 *   10/31/2000  Ram     Implemented offsets logic functions
     27 */
     28 
     29 #include "unicode/utypes.h"
     30 
     31 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     32 
     33 #include "unicode/ucnv.h"
     34 #include "unicode/uset.h"
     35 #include "unicode/ucnv_err.h"
     36 #include "unicode/ucnv_cb.h"
     37 #include "unicode/utf16.h"
     38 #include "ucnv_imp.h"
     39 #include "ucnv_bld.h"
     40 #include "ucnv_cnv.h"
     41 #include "ucnvmbcs.h"
     42 #include "cstring.h"
     43 #include "cmemory.h"
     44 #include "uassert.h"
     45 
     46 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     47 
     48 #ifdef U_ENABLE_GENERIC_ISO_2022
     49 /*
     50  * I am disabling the generic ISO-2022 converter after proposing to do so on
     51  * the icu mailing list two days ago.
     52  *
     53  * Reasons:
     54  * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     55  *    its designation sequences, single shifts with return to the previous state,
     56  *    switch-with-no-return to UTF-16BE or similar, etc.
     57  *    This is unlike the language-specific variants like ISO-2022-JP which
     58  *    require a much smaller repertoire of ISO-2022 features.
     59  *    These variants continue to be supported.
     60  * 2. I believe that no one is really using the generic ISO-2022 converter
     61  *    but rather always one of the language-specific variants.
     62  *    Note that ICU's generic ISO-2022 converter has always output one escape
     63  *    sequence followed by UTF-8 for the whole stream.
     64  * 3. Switching between subcharsets is extremely slow, because each time
     65  *    the previous converter is closed and a new one opened,
     66  *    without any kind of caching, least-recently-used list, etc.
     67  * 4. The code is currently buggy, and given the above it does not seem
     68  *    reasonable to spend the time on maintenance.
     69  * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     70  *    This means, for example, that when ISO-8859-7 is designated, the following
     71  *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     72  *    The ICU ISO-2022 converter does not handle this - and has no information
     73  *    about which subconverter would have to be shifted vs. which is designed
     74  *    for 7-bit ISO-2022.
     75  *
     76  * Markus Scherer 2003-dec-03
     77  */
     78 #endif
     79 
     80 static const char SHIFT_IN_STR[]  = "\x0F";
     81 // static const char SHIFT_OUT_STR[] = "\x0E";
     82 
     83 #define CR      0x0D
     84 #define LF      0x0A
     85 #define H_TAB   0x09
     86 #define V_TAB   0x0B
     87 #define SPACE   0x20
     88 
     89 enum {
     90     HWKANA_START=0xff61,
     91     HWKANA_END=0xff9f
     92 };
     93 
     94 /*
     95  * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     96  * as bytes 21..7E. (Subtract 0x80.)
     97  * 96-character sets with native byte values A0..FF are encoded in ISO 2022
     98  * as bytes 20..7F. (Subtract 0x80.)
     99  * Do not encode C1 control codes with native bytes 80..9F
    100  * as bytes 00..1F (C0 control codes).
    101  */
    102 enum {
    103     GR94_START=0xa1,
    104     GR94_END=0xfe,
    105     GR96_START=0xa0,
    106     GR96_END=0xff
    107 };
    108 
    109 /*
    110  * ISO 2022 control codes must not be converted from Unicode
    111  * because they would mess up the byte stream.
    112  * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    113  * corresponding to SO, SI, and ESC.
    114  */
    115 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    116 
    117 /* for ISO-2022-JP and -CN implementations */
    118 typedef enum  {
    119         /* shared values */
    120         INVALID_STATE=-1,
    121         ASCII = 0,
    122 
    123         SS2_STATE=0x10,
    124         SS3_STATE,
    125 
    126         /* JP */
    127         ISO8859_1 = 1 ,
    128         ISO8859_7 = 2 ,
    129         JISX201  = 3,
    130         JISX208 = 4,
    131         JISX212 = 5,
    132         GB2312  =6,
    133         KSC5601 =7,
    134         HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    135 
    136         /* CN */
    137         /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    138         GB2312_1=1,
    139         ISO_IR_165=2,
    140         CNS_11643=3,
    141 
    142         /*
    143          * these are used in StateEnum and ISO2022State variables,
    144          * but CNS_11643 must be used to index into myConverterArray[]
    145          */
    146         CNS_11643_0=0x20,
    147         CNS_11643_1,
    148         CNS_11643_2,
    149         CNS_11643_3,
    150         CNS_11643_4,
    151         CNS_11643_5,
    152         CNS_11643_6,
    153         CNS_11643_7
    154 } StateEnum;
    155 
    156 /* is the StateEnum charset value for a DBCS charset? */
    157 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    158 
    159 #define CSM(cs) ((uint16_t)1<<(cs))
    160 
    161 /*
    162  * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    163  * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    164  *
    165  * Note: The converter uses some leniency:
    166  * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    167  *   all versions, not just JIS7 and JIS8.
    168  * - ICU does not distinguish between different versions of JIS X 0208.
    169  */
    170 #if UCONFIG_NO_NON_HTML5_CONVERSION
    171 enum { MAX_JA_VERSION=0 };
    172 #else
    173 enum { MAX_JA_VERSION=4 };
    174 #endif
    175 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    176     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    177 #if !UCONFIG_NO_NON_HTML5_CONVERSION
    178     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    179     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    180     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    181     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    182 #endif
    183 };
    184 
    185 typedef enum {
    186         ASCII1=0,
    187         LATIN1,
    188         SBCS,
    189         DBCS,
    190         MBCS,
    191         HWKANA
    192 }Cnv2022Type;
    193 
    194 typedef struct ISO2022State {
    195     int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    196     int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    197     int8_t prevG;       /* g before single shift (SS2 or SS3) */
    198 } ISO2022State;
    199 
    200 #define UCNV_OPTIONS_VERSION_MASK 0xf
    201 #define UCNV_2022_MAX_CONVERTERS 10
    202 
    203 typedef struct{
    204     UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    205     UConverter *currentConverter;
    206     Cnv2022Type currentType;
    207     ISO2022State toU2022State, fromU2022State;
    208     uint32_t key;
    209     uint32_t version;
    210 #ifdef U_ENABLE_GENERIC_ISO_2022
    211     UBool isFirstBuffer;
    212 #endif
    213     UBool isEmptySegment;
    214     char name[30];
    215     char locale[3];
    216 }UConverterDataISO2022;
    217 
    218 /* Protos */
    219 /* ISO-2022 ----------------------------------------------------------------- */
    220 
    221 /*Forward declaration */
    222 U_CFUNC void
    223 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    224                       UErrorCode * err);
    225 U_CFUNC void
    226 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    227                                     UErrorCode * err);
    228 
    229 #define ESC_2022 0x1B /*ESC*/
    230 
    231 typedef enum
    232 {
    233         INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    234         VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    235         VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    236         VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    237 } UCNV_TableStates_2022;
    238 
    239 /*
    240 * The way these state transition arrays work is:
    241 * ex : ESC$B is the sequence for JISX208
    242 *      a) First Iteration: char is ESC
    243 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    244 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    245 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    246 *             value of x is stored at escSeqStateTable_Key_2022[0]
    247 *        iii) Save this index as offset
    248 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    249 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    250 *     b) Switch on this state and continue to next char
    251 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    252 *             which is normalize_esq_chars_2022[36] == 4
    253 *         ii) x is currently 1(from above)
    254 *               x<<=5 -- x is now 32
    255 *               x+=normalize_esq_chars_2022[36]
    256 *               now x is 36
    257 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    258 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    259 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    260 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    261 *     c) Switch on this state and continue to next char
    262 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    263 *        ii) x is currently 36 (from above)
    264 *            x<<=5 -- x is now 1152
    265 *            x+=normalize_esq_chars_2022[66]
    266 *            now x is 1161
    267 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    268 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    269 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    270 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    271 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    272 */
    273 
    274 
    275 /*Below are the 3 arrays depicting a state transition table*/
    276 static const int8_t normalize_esq_chars_2022[256] = {
    277 /*       0      1       2       3       4      5       6        7       8       9           */
    278 
    279          0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    280         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    281         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    282         ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    283         ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    284         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    285         ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    286         ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    287         ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    288         ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    289         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    290         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    292         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    293         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    297         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    298         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    299         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    300         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    301         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    302         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    303         ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    304         ,0     ,0      ,0      ,0      ,0      ,0
    305 };
    306 
    307 #ifdef U_ENABLE_GENERIC_ISO_2022
    308 /*
    309  * When the generic ISO-2022 converter is completely removed, not just disabled
    310  * per #ifdef, then the following state table and the associated tables that are
    311  * dimensioned with MAX_STATES_2022 should be trimmed.
    312  *
    313  * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    314  * the associated escape sequences starting with ESC ( B should be removed.
    315  * This includes the ones with key values 1097 and all of the ones above 1000000.
    316  *
    317  * For the latter, the tables can simply be truncated.
    318  * For the former, since the tables must be kept parallel, it is probably best
    319  * to simply duplicate an adjacent table cell, parallel in all tables.
    320  *
    321  * It may make sense to restructure the tables, especially by using small search
    322  * tables for the variants instead of indexing them parallel to the table here.
    323  */
    324 #endif
    325 
    326 #define MAX_STATES_2022 74
    327 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    328 /*   0           1           2           3           4           5           6           7           8           9           */
    329 
    330      1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    331     ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    332     ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    333     ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    334     ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    335     ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    336     ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    337     ,35947631   ,35947635   ,35947636   ,35947638
    338 };
    339 
    340 #ifdef U_ENABLE_GENERIC_ISO_2022
    341 
    342 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    343  /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    344 
    345      NULL                   ,NULL                   ,NULL                   ,NULL               ,NULL               ,NULL                   ,NULL                   ,NULL                   ,"latin1"               ,"latin1"
    346     ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    347     ,"latin1"               ,NULL                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,NULL                   ,NULL                   ,NULL                   ,NULL                   ,"UTF8"
    348     ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,NULL               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    349     ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    350     ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    351     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    352     ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    353 };
    354 
    355 #endif
    356 
    357 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    358 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    359      VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    360     ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    361     ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    362     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    363     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    364     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    365     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    366     ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    367 };
    368 
    369 
    370 /* Enable ISO-2022-{KR,CN,CN-Ext} for now.
    371  * TODO(jshin): Disable it when we know what to do about 'replacement'
    372  * encodings. See http://crbug.com/277037 and
    373  * https://codereview.chromium.org/145973021/
    374  */
    375 #ifndef U_ENABLE_ISO_2022_KR_CN
    376 #define U_ENABLE_ISO_2022_KR_CN 1
    377 #endif
    378 
    379 /* Type def for refactoring changeState_2022 code*/
    380 typedef enum{
    381 #ifdef U_ENABLE_GENERIC_ISO_2022
    382     ISO_2022=0,
    383 #endif
    384     ISO_2022_JP=1,
    385 #ifdef U_ENABLE_ISO_2022_KR_CN
    386     ISO_2022_KR=2,
    387     ISO_2022_CN=3
    388 #endif
    389 } Variant2022;
    390 
    391 /*********** ISO 2022 Converter Protos ***********/
    392 static void
    393 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    394 
    395 static void
    396  _ISO2022Close(UConverter *converter);
    397 
    398 static void
    399 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    400 
    401 static const char*
    402 _ISO2022getName(const UConverter* cnv);
    403 
    404 static void
    405 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    406 
    407 static UConverter *
    408 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    409 
    410 #ifdef U_ENABLE_GENERIC_ISO_2022
    411 static void
    412 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    413 #endif
    414 
    415 namespace {
    416 
    417 /*const UConverterSharedData _ISO2022Data;*/
    418 extern const UConverterSharedData _ISO2022JPData;
    419 extern const UConverterSharedData _ISO2022KRData;
    420 extern const UConverterSharedData _ISO2022CNData;
    421 
    422 }  // namespace
    423 
    424 /*************** Converter implementations ******************/
    425 
    426 /* The purpose of this function is to get around gcc compiler warnings. */
    427 static inline void
    428 fromUWriteUInt8(UConverter *cnv,
    429                  const char *bytes, int32_t length,
    430                  uint8_t **target, const char *targetLimit,
    431                  int32_t **offsets,
    432                  int32_t sourceIndex,
    433                  UErrorCode *pErrorCode)
    434 {
    435     char *targetChars = (char *)*target;
    436     ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    437                          offsets, sourceIndex, pErrorCode);
    438     *target = (uint8_t*)targetChars;
    439 
    440 }
    441 
    442 static inline void
    443 setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){
    444     if(myConverterData->version == 1) {
    445         UConverter *cnv = myConverterData->currentConverter;
    446 
    447         cnv->toUnicodeStatus=0;     /* offset */
    448         cnv->mode=0;                /* state */
    449         cnv->toULength=0;           /* byteIndex */
    450     }
    451 }
    452 
    453 static inline void
    454 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    455    /* in ISO-2022-KR the designator sequence appears only once
    456     * in a file so we append it only once
    457     */
    458     if( converter->charErrorBufferLength==0){
    459 
    460         converter->charErrorBufferLength = 4;
    461         converter->charErrorBuffer[0] = 0x1b;
    462         converter->charErrorBuffer[1] = 0x24;
    463         converter->charErrorBuffer[2] = 0x29;
    464         converter->charErrorBuffer[3] = 0x43;
    465     }
    466     if(myConverterData->version == 1) {
    467         UConverter *cnv = myConverterData->currentConverter;
    468 
    469         cnv->fromUChar32=0;
    470         cnv->fromUnicodeStatus=1;   /* prevLength */
    471     }
    472 }
    473 
    474 static void
    475 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    476 
    477     char myLocale[6]={' ',' ',' ',' ',' ',' '};
    478 
    479     cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    480     if(cnv->extraInfo != NULL) {
    481         UConverterNamePieces stackPieces;
    482         UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
    483         UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
    484         uint32_t version;
    485 
    486         stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    487 
    488         uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    489         myConverterData->currentType = ASCII1;
    490         cnv->fromUnicodeStatus =FALSE;
    491         if(pArgs->locale){
    492             uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
    493         }
    494         version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    495         myConverterData->version = version;
    496         if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
    497             (myLocale[2]=='_' || myLocale[2]=='\0'))
    498         {
    499             size_t len=0;
    500             /* open the required converters and cache them */
    501             if(version>MAX_JA_VERSION) {
    502                 /* prevent indexing beyond jpCharsetMasks[] */
    503                 myConverterData->version = version = 0;
    504             }
    505 #if !UCONFIG_NO_NON_HTML5_CONVERSION
    506             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    507                 myConverterData->myConverterArray[ISO8859_7] =
    508                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    509             }
    510 #endif
    511             myConverterData->myConverterArray[JISX208] =
    512                 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
    513 #if !UCONFIG_NO_NON_HTML5_CONVERSION
    514             if(jpCharsetMasks[version]&CSM(JISX212)) {
    515                 myConverterData->myConverterArray[JISX212] =
    516                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    517             }
    518             if(jpCharsetMasks[version]&CSM(GB2312)) {
    519                 myConverterData->myConverterArray[GB2312] =
    520                     ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
    521             }
    522             if(jpCharsetMasks[version]&CSM(KSC5601)) {
    523                 myConverterData->myConverterArray[KSC5601] =
    524                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    525             }
    526 #endif
    527 
    528             /* set the function pointers to appropriate funtions */
    529             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
    530             uprv_strcpy(myConverterData->locale,"ja");
    531 
    532             (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    533             len = uprv_strlen(myConverterData->name);
    534             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
    535             myConverterData->name[len+1]='\0';
    536         }
    537 #ifdef U_ENABLE_ISO_2022_KR_CN
    538         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    539             (myLocale[2]=='_' || myLocale[2]=='\0'))
    540         {
    541             const char *cnvName;
    542             if(version==1) {
    543                 cnvName="icu-internal-25546";
    544             } else {
    545                 cnvName="ibm-949";
    546                 myConverterData->version=version=0;
    547             }
    548             if(pArgs->onlyTestIsLoadable) {
    549                 ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    550                 uprv_free(cnv->extraInfo);
    551                 cnv->extraInfo=NULL;
    552                 return;
    553             } else {
    554                 myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    555                 if (U_FAILURE(*errorCode)) {
    556                     _ISO2022Close(cnv);
    557                     return;
    558                 }
    559 
    560                 if(version==1) {
    561                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    562                     uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    563                     cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    564                 }else{
    565                     (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    566                 }
    567 
    568                 /* initialize the state variables */
    569                 setInitialStateToUnicodeKR(cnv, myConverterData);
    570                 setInitialStateFromUnicodeKR(cnv, myConverterData);
    571 
    572                 /* set the function pointers to appropriate funtions */
    573                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
    574                 uprv_strcpy(myConverterData->locale,"ko");
    575             }
    576         }
    577         else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    578             (myLocale[2]=='_' || myLocale[2]=='\0'))
    579         {
    580 
    581             /* open the required converters and cache them */
    582             myConverterData->myConverterArray[GB2312_1] =
    583                 ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
    584             if(version==1) {
    585                 myConverterData->myConverterArray[ISO_IR_165] =
    586                     ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
    587             }
    588             myConverterData->myConverterArray[CNS_11643] =
    589                 ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
    590 
    591 
    592             /* set the function pointers to appropriate funtions */
    593             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
    594             uprv_strcpy(myConverterData->locale,"cn");
    595 
    596             if (version==0){
    597                 myConverterData->version = 0;
    598                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    599             }else if (version==1){
    600                 myConverterData->version = 1;
    601                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    602             }else {
    603                 myConverterData->version = 2;
    604                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
    605             }
    606         }
    607 #endif // U_ENABLE_ISO_2022_KR_CN
    608         else{
    609 #ifdef U_ENABLE_GENERIC_ISO_2022
    610             myConverterData->isFirstBuffer = TRUE;
    611 
    612             /* append the UTF-8 escape sequence */
    613             cnv->charErrorBufferLength = 3;
    614             cnv->charErrorBuffer[0] = 0x1b;
    615             cnv->charErrorBuffer[1] = 0x25;
    616             cnv->charErrorBuffer[2] = 0x42;
    617 
    618             cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    619             /* initialize the state variables */
    620             uprv_strcpy(myConverterData->name,"ISO_2022");
    621 #else
    622             *errorCode = U_UNSUPPORTED_ERROR;
    623             return;
    624 #endif
    625         }
    626 
    627         cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    628 
    629         if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    630             _ISO2022Close(cnv);
    631         }
    632     } else {
    633         *errorCode = U_MEMORY_ALLOCATION_ERROR;
    634     }
    635 }
    636 
    637 
    638 static void
    639 _ISO2022Close(UConverter *converter) {
    640     UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);
    641     UConverterSharedData **array = myData->myConverterArray;
    642     int32_t i;
    643 
    644     if (converter->extraInfo != NULL) {
    645         /*close the array of converter pointers and free the memory*/
    646         for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    647             if(array[i]!=NULL) {
    648                 ucnv_unloadSharedDataIfReady(array[i]);
    649             }
    650         }
    651 
    652         ucnv_close(myData->currentConverter);
    653 
    654         if(!converter->isExtraLocal){
    655             uprv_free (converter->extraInfo);
    656             converter->extraInfo = NULL;
    657         }
    658     }
    659 }
    660 
    661 static void
    662 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    663     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);
    664     if(choice<=UCNV_RESET_TO_UNICODE) {
    665         uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    666         myConverterData->key = 0;
    667         myConverterData->isEmptySegment = FALSE;
    668     }
    669     if(choice!=UCNV_RESET_TO_UNICODE) {
    670         uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    671     }
    672 #ifdef U_ENABLE_GENERIC_ISO_2022
    673     if(myConverterData->locale[0] == 0){
    674         if(choice<=UCNV_RESET_TO_UNICODE) {
    675             myConverterData->isFirstBuffer = TRUE;
    676             myConverterData->key = 0;
    677             if (converter->mode == UCNV_SO){
    678                 ucnv_close (myConverterData->currentConverter);
    679                 myConverterData->currentConverter=NULL;
    680             }
    681             converter->mode = UCNV_SI;
    682         }
    683         if(choice!=UCNV_RESET_TO_UNICODE) {
    684             /* re-append UTF-8 escape sequence */
    685             converter->charErrorBufferLength = 3;
    686             converter->charErrorBuffer[0] = 0x1b;
    687             converter->charErrorBuffer[1] = 0x28;
    688             converter->charErrorBuffer[2] = 0x42;
    689         }
    690     }
    691     else
    692 #endif
    693     {
    694         /* reset the state variables */
    695         if(myConverterData->locale[0] == 'k'){
    696             if(choice<=UCNV_RESET_TO_UNICODE) {
    697                 setInitialStateToUnicodeKR(converter, myConverterData);
    698             }
    699             if(choice!=UCNV_RESET_TO_UNICODE) {
    700                 setInitialStateFromUnicodeKR(converter, myConverterData);
    701             }
    702         }
    703     }
    704 }
    705 
    706 static const char*
    707 _ISO2022getName(const UConverter* cnv){
    708     if(cnv->extraInfo){
    709         UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    710         return myData->name;
    711     }
    712     return NULL;
    713 }
    714 
    715 
    716 /*************** to unicode *******************/
    717 /****************************************************************************
    718  * Recognized escape sequences are
    719  * <ESC>(B  ASCII
    720  * <ESC>.A  ISO-8859-1
    721  * <ESC>.F  ISO-8859-7
    722  * <ESC>(J  JISX-201
    723  * <ESC>(I  JISX-201
    724  * <ESC>$B  JISX-208
    725  * <ESC>$@  JISX-208
    726  * <ESC>$(D JISX-212
    727  * <ESC>$A  GB2312
    728  * <ESC>$(C KSC5601
    729  */
    730 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    731 /*      0                1               2               3               4               5               6               7               8               9    */
    732     INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    733     ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    734     ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    735     ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    736     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    737     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    738     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    739     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    740 };
    741 
    742 /*************** to unicode *******************/
    743 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    744 /*      0                1               2               3               4               5               6               7               8               9    */
    745      INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    746     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    747     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    748     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    749     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    750     ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    751     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    752     ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    753 };
    754 
    755 
    756 static UCNV_TableStates_2022
    757 getKey_2022(char c,int32_t* key,int32_t* offset){
    758     int32_t togo;
    759     int32_t low = 0;
    760     int32_t hi = MAX_STATES_2022;
    761     int32_t oldmid=0;
    762 
    763     togo = normalize_esq_chars_2022[(uint8_t)c];
    764     if(togo == 0) {
    765         /* not a valid character anywhere in an escape sequence */
    766         *key = 0;
    767         *offset = 0;
    768         return INVALID_2022;
    769     }
    770     togo = (*key << 5) + togo;
    771 
    772     while (hi != low)  /*binary search*/{
    773 
    774         register int32_t mid = (hi+low) >> 1; /*Finds median*/
    775 
    776         if (mid == oldmid)
    777             break;
    778 
    779         if (escSeqStateTable_Key_2022[mid] > togo){
    780             hi = mid;
    781         }
    782         else if (escSeqStateTable_Key_2022[mid] < togo){
    783             low = mid;
    784         }
    785         else /*we found it*/{
    786             *key = togo;
    787             *offset = mid;
    788             return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid];
    789         }
    790         oldmid = mid;
    791 
    792     }
    793 
    794     *key = 0;
    795     *offset = 0;
    796     return INVALID_2022;
    797 }
    798 
    799 /*runs through a state machine to determine the escape sequence - codepage correspondance
    800  */
    801 static void
    802 changeState_2022(UConverter* _this,
    803                 const char** source,
    804                 const char* sourceLimit,
    805                 Variant2022 var,
    806                 UErrorCode* err){
    807     UCNV_TableStates_2022 value;
    808     UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
    809     uint32_t key = myData2022->key;
    810     int32_t offset = 0;
    811     int8_t initialToULength = _this->toULength;
    812     char c;
    813 
    814     value = VALID_NON_TERMINAL_2022;
    815     while (*source < sourceLimit) {
    816         c = *(*source)++;
    817         _this->toUBytes[_this->toULength++]=(uint8_t)c;
    818         value = getKey_2022(c,(int32_t *) &key, &offset);
    819 
    820         switch (value){
    821 
    822         case VALID_NON_TERMINAL_2022 :
    823             /* continue with the loop */
    824             break;
    825 
    826         case VALID_TERMINAL_2022:
    827             key = 0;
    828             goto DONE;
    829 
    830         case INVALID_2022:
    831             goto DONE;
    832 
    833         case VALID_MAYBE_TERMINAL_2022:
    834 #ifdef U_ENABLE_GENERIC_ISO_2022
    835             /* ESC ( B is ambiguous only for ISO_2022 itself */
    836             if(var == ISO_2022) {
    837                 /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    838                 _this->toULength = 0;
    839 
    840                 /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    841 
    842                 /* continue with the loop */
    843                 value = VALID_NON_TERMINAL_2022;
    844                 break;
    845             } else
    846 #endif
    847             {
    848                 /* not ISO_2022 itself, finish here */
    849                 value = VALID_TERMINAL_2022;
    850                 key = 0;
    851                 goto DONE;
    852             }
    853         }
    854     }
    855 
    856 DONE:
    857     myData2022->key = key;
    858 
    859     if (value == VALID_NON_TERMINAL_2022) {
    860         /* indicate that the escape sequence is incomplete: key!=0 */
    861         return;
    862     } else if (value == INVALID_2022 ) {
    863         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    864     } else /* value == VALID_TERMINAL_2022 */ {
    865         switch(var){
    866 #ifdef U_ENABLE_GENERIC_ISO_2022
    867         case ISO_2022:
    868         {
    869             const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    870             if(chosenConverterName == NULL) {
    871                 /* SS2 or SS3 */
    872                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    873                 _this->toUCallbackReason = UCNV_UNASSIGNED;
    874                 return;
    875             }
    876 
    877             _this->mode = UCNV_SI;
    878             ucnv_close(myData2022->currentConverter);
    879             myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    880             if(U_SUCCESS(*err)) {
    881                 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    882                 _this->mode = UCNV_SO;
    883             }
    884             break;
    885         }
    886 #endif
    887         case ISO_2022_JP:
    888             {
    889                 StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset];
    890                 switch(tempState) {
    891                 case INVALID_STATE:
    892                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    893                     break;
    894                 case SS2_STATE:
    895                     if(myData2022->toU2022State.cs[2]!=0) {
    896                         if(myData2022->toU2022State.g<2) {
    897                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    898                         }
    899                         myData2022->toU2022State.g=2;
    900                     } else {
    901                         /* illegal to have SS2 before a matching designator */
    902                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    903                     }
    904                     break;
    905                 /* case SS3_STATE: not used in ISO-2022-JP-x */
    906                 case ISO8859_1:
    907                 case ISO8859_7:
    908                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    909                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    910                     } else {
    911                         /* G2 charset for SS2 */
    912                         myData2022->toU2022State.cs[2]=(int8_t)tempState;
    913                     }
    914                     break;
    915                 default:
    916                     if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    917                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    918                     } else {
    919                         /* G0 charset */
    920                         myData2022->toU2022State.cs[0]=(int8_t)tempState;
    921                     }
    922                     break;
    923                 }
    924             }
    925             break;
    926         case ISO_2022_CN:
    927             {
    928                 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
    929                 switch(tempState) {
    930                 case INVALID_STATE:
    931                     *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    932                     break;
    933                 case SS2_STATE:
    934                     if(myData2022->toU2022State.cs[2]!=0) {
    935                         if(myData2022->toU2022State.g<2) {
    936                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    937                         }
    938                         myData2022->toU2022State.g=2;
    939                     } else {
    940                         /* illegal to have SS2 before a matching designator */
    941                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    942                     }
    943                     break;
    944                 case SS3_STATE:
    945                     if(myData2022->toU2022State.cs[3]!=0) {
    946                         if(myData2022->toU2022State.g<2) {
    947                             myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    948                         }
    949                         myData2022->toU2022State.g=3;
    950                     } else {
    951                         /* illegal to have SS3 before a matching designator */
    952                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    953                     }
    954                     break;
    955                 case ISO_IR_165:
    956                     if(myData2022->version==0) {
    957                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    958                         break;
    959                     }
    960                     /*fall through*/
    961                 case GB2312_1:
    962                     /*fall through*/
    963                 case CNS_11643_1:
    964                     myData2022->toU2022State.cs[1]=(int8_t)tempState;
    965                     break;
    966                 case CNS_11643_2:
    967                     myData2022->toU2022State.cs[2]=(int8_t)tempState;
    968                     break;
    969                 default:
    970                     /* other CNS 11643 planes */
    971                     if(myData2022->version==0) {
    972                         *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    973                     } else {
    974                        myData2022->toU2022State.cs[3]=(int8_t)tempState;
    975                     }
    976                     break;
    977                 }
    978             }
    979             break;
    980         case ISO_2022_KR:
    981             if(offset==0x30){
    982                 /* nothing to be done, just accept this one escape sequence */
    983             } else {
    984                 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    985             }
    986             break;
    987 
    988         default:
    989             *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    990             break;
    991         }
    992     }
    993     if(U_SUCCESS(*err)) {
    994         _this->toULength = 0;
    995     } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
    996         if(_this->toULength>1) {
    997             /*
    998              * Ticket 5691: consistent illegal sequences:
    999              * - We include at least the first byte (ESC) in the illegal sequence.
   1000              * - If any of the non-initial bytes could be the start of a character,
   1001              *   we stop the illegal sequence before the first one of those.
   1002              *   In escape sequences, all following bytes are "printable", that is,
   1003              *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
   1004              *   they are valid single/lead bytes.
   1005              *   For simplicity, we always only report the initial ESC byte as the
   1006              *   illegal sequence and back out all other bytes we looked at.
   1007              */
   1008             /* Back out some bytes. */
   1009             int8_t backOutDistance=_this->toULength-1;
   1010             int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
   1011             if(backOutDistance<=bytesFromThisBuffer) {
   1012                 /* same as initialToULength<=1 */
   1013                 *source-=backOutDistance;
   1014             } else {
   1015                 /* Back out bytes from the previous buffer: Need to replay them. */
   1016                 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
   1017                 /* same as -(initialToULength-1) */
   1018                 /* preToULength is negative! */
   1019                 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
   1020                 *source-=bytesFromThisBuffer;
   1021             }
   1022             _this->toULength=1;
   1023         }
   1024     } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
   1025         _this->toUCallbackReason = UCNV_UNASSIGNED;
   1026     }
   1027 }
   1028 
   1029 /*Checks the characters of the buffer against valid 2022 escape sequences
   1030 *if the match we return a pointer to the initial start of the sequence otherwise
   1031 *we return sourceLimit
   1032 */
   1033 /*for 2022 looks ahead in the stream
   1034  *to determine the longest possible convertible
   1035  *data stream
   1036  */
   1037 static inline const char*
   1038 getEndOfBuffer_2022(const char** source,
   1039                    const char* sourceLimit,
   1040                    UBool /*flush*/){
   1041 
   1042     const char* mySource = *source;
   1043 
   1044 #ifdef U_ENABLE_GENERIC_ISO_2022
   1045     if (*source >= sourceLimit)
   1046         return sourceLimit;
   1047 
   1048     do{
   1049 
   1050         if (*mySource == ESC_2022){
   1051             int8_t i;
   1052             int32_t key = 0;
   1053             int32_t offset;
   1054             UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1055 
   1056             /* Kludge: I could not
   1057             * figure out the reason for validating an escape sequence
   1058             * twice - once here and once in changeState_2022().
   1059             * is it possible to have an ESC character in a ISO2022
   1060             * byte stream which is valid in a code page? Is it legal?
   1061             */
   1062             for (i=0;
   1063             (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1064             i++) {
   1065                 value =  getKey_2022(*(mySource+i), &key, &offset);
   1066             }
   1067             if (value > 0 || *mySource==ESC_2022)
   1068                 return mySource;
   1069 
   1070             if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1071                 return sourceLimit;
   1072         }
   1073     }while (++mySource < sourceLimit);
   1074 
   1075     return sourceLimit;
   1076 #else
   1077     while(mySource < sourceLimit && *mySource != ESC_2022) {
   1078         ++mySource;
   1079     }
   1080     return mySource;
   1081 #endif
   1082 }
   1083 
   1084 
   1085 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1086  * any future change in _MBCSFromUChar32() function should be reflected here.
   1087  * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1088  */
   1089 static inline int32_t
   1090 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1091                                          UChar32 c,
   1092                                          uint32_t* value,
   1093                                          UBool useFallback,
   1094                                          int outputType)
   1095 {
   1096     const int32_t *cx;
   1097     const uint16_t *table;
   1098     uint32_t stage2Entry;
   1099     uint32_t myValue;
   1100     int32_t length;
   1101     const uint8_t *p;
   1102     /*
   1103      * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1104      * Use internal version of ucnv_open() that verifies that the new structures are available,
   1105      * else U_INTERNAL_PROGRAM_ERROR.
   1106      */
   1107     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1108     if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1109         table=sharedData->mbcs.fromUnicodeTable;
   1110         stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1111         /* get the bytes and the length for the output */
   1112         if(outputType==MBCS_OUTPUT_2){
   1113             myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1114             if(myValue<=0xff) {
   1115                 length=1;
   1116             } else {
   1117                 length=2;
   1118             }
   1119         } else /* outputType==MBCS_OUTPUT_3 */ {
   1120             p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1121             myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   1122             if(myValue<=0xff) {
   1123                 length=1;
   1124             } else if(myValue<=0xffff) {
   1125                 length=2;
   1126             } else {
   1127                 length=3;
   1128             }
   1129         }
   1130         /* is this code point assigned, or do we use fallbacks? */
   1131         if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1132             /* assigned */
   1133             *value=myValue;
   1134             return length;
   1135         } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1136             /*
   1137              * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1138              * There is no way with this data structure for fallback output
   1139              * to be a zero byte.
   1140              */
   1141             *value=myValue;
   1142             return -length;
   1143         }
   1144     }
   1145 
   1146     cx=sharedData->mbcs.extIndexes;
   1147     if(cx!=NULL) {
   1148         return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1149     }
   1150 
   1151     /* unassigned */
   1152     return 0;
   1153 }
   1154 
   1155 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1156  * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1157  * @param retval pointer to output byte
   1158  * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1159  */
   1160 static inline int32_t
   1161 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1162                                        UChar32 c,
   1163                                        uint32_t* retval,
   1164                                        UBool useFallback)
   1165 {
   1166     const uint16_t *table;
   1167     int32_t value;
   1168     /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1169     if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1170         return 0;
   1171     }
   1172     /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1173     table=sharedData->mbcs.fromUnicodeTable;
   1174     /* get the byte for the output */
   1175     value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1176     /* is this code point assigned, or do we use fallbacks? */
   1177     *retval=(uint32_t)(value&0xff);
   1178     if(value>=0xf00) {
   1179         return 1;  /* roundtrip */
   1180     } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1181         return -1;  /* fallback taken */
   1182     } else {
   1183         return 0;  /* no mapping */
   1184     }
   1185 }
   1186 
   1187 /*
   1188  * Check that the result is a 2-byte value with each byte in the range A1..FE
   1189  * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1190  * to move it to the ISO 2022 range 21..7E.
   1191  * Return 0 if out of range.
   1192  */
   1193 static inline uint32_t
   1194 _2022FromGR94DBCS(uint32_t value) {
   1195     if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1196         (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
   1197     ) {
   1198         return value - 0x8080;  /* shift down to 21..7e byte range */
   1199     } else {
   1200         return 0;  /* not valid for ISO 2022 */
   1201     }
   1202 }
   1203 
   1204 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1205 /*
   1206  * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1207  * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1208  * unchanged.
   1209  */
   1210 static inline uint32_t
   1211 _2022ToGR94DBCS(uint32_t value) {
   1212     uint32_t returnValue = value + 0x8080;
   1213     if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1214         (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1215         return returnValue;
   1216     } else {
   1217         return value;
   1218     }
   1219 }
   1220 #endif
   1221 
   1222 #ifdef U_ENABLE_GENERIC_ISO_2022
   1223 
   1224 /**********************************************************************************
   1225 *  ISO-2022 Converter
   1226 *
   1227 *
   1228 */
   1229 
   1230 static void
   1231 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1232                                                            UErrorCode* err){
   1233     const char* mySourceLimit, *realSourceLimit;
   1234     const char* sourceStart;
   1235     const UChar* myTargetStart;
   1236     UConverter* saveThis;
   1237     UConverterDataISO2022* myData;
   1238     int8_t length;
   1239 
   1240     saveThis = args->converter;
   1241     myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1242 
   1243     realSourceLimit = args->sourceLimit;
   1244     while (args->source < realSourceLimit) {
   1245         if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1246             /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1247             mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1248 
   1249             if(args->source < mySourceLimit) {
   1250                 if(myData->currentConverter==NULL) {
   1251                     myData->currentConverter = ucnv_open("ASCII",err);
   1252                     if(U_FAILURE(*err)){
   1253                         return;
   1254                     }
   1255 
   1256                     myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1257                     saveThis->mode = UCNV_SO;
   1258                 }
   1259 
   1260                 /* convert to before the ESC or until the end of the buffer */
   1261                 myData->isFirstBuffer=FALSE;
   1262                 sourceStart = args->source;
   1263                 myTargetStart = args->target;
   1264                 args->converter = myData->currentConverter;
   1265                 ucnv_toUnicode(args->converter,
   1266                     &args->target,
   1267                     args->targetLimit,
   1268                     &args->source,
   1269                     mySourceLimit,
   1270                     args->offsets,
   1271                     (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1272                     err);
   1273                 args->converter = saveThis;
   1274 
   1275                 if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1276                     /* move the overflow buffer */
   1277                     length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1278                     myData->currentConverter->UCharErrorBufferLength = 0;
   1279                     if(length > 0) {
   1280                         uprv_memcpy(saveThis->UCharErrorBuffer,
   1281                                     myData->currentConverter->UCharErrorBuffer,
   1282                                     length*U_SIZEOF_UCHAR);
   1283                     }
   1284                     return;
   1285                 }
   1286 
   1287                 /*
   1288                  * At least one of:
   1289                  * -Error while converting
   1290                  * -Done with entire buffer
   1291                  * -Need to write offsets or update the current offset
   1292                  *  (leave that up to the code in ucnv.c)
   1293                  *
   1294                  * or else we just stopped at an ESC byte and continue with changeState_2022()
   1295                  */
   1296                 if (U_FAILURE(*err) ||
   1297                     (args->source == realSourceLimit) ||
   1298                     (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||
   1299                     (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1300                 ) {
   1301                     /* copy partial or error input for truncated detection and error handling */
   1302                     if(U_FAILURE(*err)) {
   1303                         length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1304                         if(length > 0) {
   1305                             uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1306                         }
   1307                     } else {
   1308                         length = saveThis->toULength = myData->currentConverter->toULength;
   1309                         if(length > 0) {
   1310                             uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1311                             if(args->source < mySourceLimit) {
   1312                                 *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1313                             }
   1314                         }
   1315                     }
   1316                     return;
   1317                 }
   1318             }
   1319         }
   1320 
   1321         sourceStart = args->source;
   1322         changeState_2022(args->converter,
   1323                &(args->source),
   1324                realSourceLimit,
   1325                ISO_2022,
   1326                err);
   1327         if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {
   1328             /* let the ucnv.c code update its current offset */
   1329             return;
   1330         }
   1331     }
   1332 }
   1333 
   1334 #endif
   1335 
   1336 /*
   1337  * To Unicode Callback helper function
   1338  */
   1339 static void
   1340 toUnicodeCallback(UConverter *cnv,
   1341                   const uint32_t sourceChar, const uint32_t targetUniChar,
   1342                   UErrorCode* err){
   1343     if(sourceChar>0xff){
   1344         cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);
   1345         cnv->toUBytes[1] = (uint8_t)sourceChar;
   1346         cnv->toULength = 2;
   1347     }
   1348     else{
   1349         cnv->toUBytes[0] =(char) sourceChar;
   1350         cnv->toULength = 1;
   1351     }
   1352 
   1353     if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1354         *err = U_INVALID_CHAR_FOUND;
   1355     }
   1356     else{
   1357         *err = U_ILLEGAL_CHAR_FOUND;
   1358     }
   1359 }
   1360 
   1361 /**************************************ISO-2022-JP*************************************************/
   1362 
   1363 /************************************** IMPORTANT **************************************************
   1364 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1365 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1366 * The converter iterates over each Unicode codepoint
   1367 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1368 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1369 * would do as far as possible.
   1370 *
   1371 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1372 * sure that ISO-2022 is also changed.
   1373 ***************************************************************************************************
   1374 */
   1375 
   1376 /***************************************************************************************************
   1377 * Rules for ISO-2022-jp encoding
   1378 * (i)   Escape sequences must be fully contained within a line they should not
   1379 *       span new lines or CRs
   1380 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1381 *       JIS-Roman character escape sequence should follow before the line terminates
   1382 * (iii) If the first character on the line is represented by two bytes then a two
   1383 *       byte character escape sequence should precede it
   1384 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1385 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1386 *       and invoked with SS2 (ESC N).
   1387 * (vi)  If there is any G0 designation in text, there must be a switch to
   1388 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1389 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1390 *       characters such as tab or CRLF.
   1391 * (vi)  Supported encodings:
   1392 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1393 *
   1394 *  source : RFC-1554
   1395 *
   1396 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1397 *          KSC5601 : alias to ibm-949 mapping table
   1398 *          GB2312 : alias to ibm-1386 mapping table
   1399 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1400 *          ISO-8859-7 : alisas to ibm-9409 mapping table
   1401 */
   1402 
   1403 /* preference order of JP charsets */
   1404 static const StateEnum jpCharsetPref[]={
   1405     ASCII,
   1406     JISX201,
   1407     ISO8859_1,
   1408     ISO8859_7,
   1409     JISX208,
   1410     JISX212,
   1411     GB2312,
   1412     KSC5601,
   1413     HWKANA_7BIT
   1414 };
   1415 
   1416 /*
   1417  * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1418  * not in order of jpCharsetPref[]!
   1419  */
   1420 static const char escSeqChars[][6] ={
   1421     "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1422     "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1423     "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1424     "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1425     "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1426     "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1427     "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1428     "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1429     "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1430 
   1431 };
   1432 static  const int8_t escSeqCharsLen[] ={
   1433     3, /* length of <ESC>(B  ASCII       */
   1434     3, /* length of <ESC>.A  ISO-8859-1  */
   1435     3, /* length of <ESC>.F  ISO-8859-7  */
   1436     3, /* length of <ESC>(J  JISX-201    */
   1437     3, /* length of <ESC>$B  JISX-208    */
   1438     4, /* length of <ESC>$(D JISX-212    */
   1439     3, /* length of <ESC>$A  GB2312      */
   1440     4, /* length of <ESC>$(C KSC5601     */
   1441     3  /* length of <ESC>(I  HWKANA_7BIT */
   1442 };
   1443 
   1444 /*
   1445 * The iteration over various code pages works this way:
   1446 * i)   Get the currentState from myConverterData->currentState
   1447 * ii)  Check if the character is mapped to a valid character in the currentState
   1448 *      Yes ->  a) set the initIterState to currentState
   1449 *       b) remain in this state until an invalid character is found
   1450 *      No  ->  a) go to the next code page and find the character
   1451 * iii) Before changing the state increment the current state check if the current state
   1452 *      is equal to the intitIteration state
   1453 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1454 *       break and return a U_INVALID_CHARACTER error
   1455 *      No  ->  Continue and find the character in next code page
   1456 *
   1457 *
   1458 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1459 */
   1460 
   1461 /* Map 00..7F to Unicode according to JIS X 0201. */
   1462 static inline uint32_t
   1463 jisx201ToU(uint32_t value) {
   1464     if(value < 0x5c) {
   1465         return value;
   1466     } else if(value == 0x5c) {
   1467         return 0xa5;
   1468     } else if(value == 0x7e) {
   1469         return 0x203e;
   1470     } else /* value <= 0x7f */ {
   1471         return value;
   1472     }
   1473 }
   1474 
   1475 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1476 static inline uint32_t
   1477 jisx201FromU(uint32_t value) {
   1478     if(value<=0x7f) {
   1479         if(value!=0x5c && value!=0x7e) {
   1480             return value;
   1481         }
   1482     } else if(value==0xa5) {
   1483         return 0x5c;
   1484     } else if(value==0x203e) {
   1485         return 0x7e;
   1486     }
   1487     return 0xfffe;
   1488 }
   1489 
   1490 /*
   1491  * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1492  * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1493  * Return 0 if the byte pair is out of range.
   1494  */
   1495 static inline uint32_t
   1496 _2022FromSJIS(uint32_t value) {
   1497     uint8_t trail;
   1498 
   1499     if(value > 0xEFFC) {
   1500         return 0;  /* beyond JIS X 0208 */
   1501     }
   1502 
   1503     trail = (uint8_t)value;
   1504 
   1505     value &= 0xff00;  /* lead byte */
   1506     if(value <= 0x9f00) {
   1507         value -= 0x7000;
   1508     } else /* 0xe000 <= value <= 0xef00 */ {
   1509         value -= 0xb000;
   1510     }
   1511     value <<= 1;
   1512 
   1513     if(trail <= 0x9e) {
   1514         value -= 0x100;
   1515         if(trail <= 0x7e) {
   1516             value |= trail - 0x1f;
   1517         } else {
   1518             value |= trail - 0x20;
   1519         }
   1520     } else /* trail <= 0xfc */ {
   1521         value |= trail - 0x7e;
   1522     }
   1523     return value;
   1524 }
   1525 
   1526 /*
   1527  * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1528  * If either byte is outside 21..7E make sure that the result is not valid
   1529  * for Shift-JIS so that the converter catches it.
   1530  * Some invalid byte values already turn into equally invalid Shift-JIS
   1531  * byte values and need not be tested explicitly.
   1532  */
   1533 static inline void
   1534 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1535     if(c1&1) {
   1536         ++c1;
   1537         if(c2 <= 0x5f) {
   1538             c2 += 0x1f;
   1539         } else if(c2 <= 0x7e) {
   1540             c2 += 0x20;
   1541         } else {
   1542             c2 = 0;  /* invalid */
   1543         }
   1544     } else {
   1545         if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) {
   1546             c2 += 0x7e;
   1547         } else {
   1548             c2 = 0;  /* invalid */
   1549         }
   1550     }
   1551     c1 >>= 1;
   1552     if(c1 <= 0x2f) {
   1553         c1 += 0x70;
   1554     } else if(c1 <= 0x3f) {
   1555         c1 += 0xb0;
   1556     } else {
   1557         c1 = 0;  /* invalid */
   1558     }
   1559     bytes[0] = (char)c1;
   1560     bytes[1] = (char)c2;
   1561 }
   1562 
   1563 /*
   1564  * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1565  * Katakana.
   1566  * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1567  * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1568  * These were the only fallbacks in ICU's jisx-208.ucm file.
   1569  */
   1570 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1571     0x2123,  /* U+FF61 */
   1572     0x2156,
   1573     0x2157,
   1574     0x2122,
   1575     0x2126,
   1576     0x2572,
   1577     0x2521,
   1578     0x2523,
   1579     0x2525,
   1580     0x2527,
   1581     0x2529,
   1582     0x2563,
   1583     0x2565,
   1584     0x2567,
   1585     0x2543,
   1586     0x213C,  /* U+FF70 */
   1587     0x2522,
   1588     0x2524,
   1589     0x2526,
   1590     0x2528,
   1591     0x252A,
   1592     0x252B,
   1593     0x252D,
   1594     0x252F,
   1595     0x2531,
   1596     0x2533,
   1597     0x2535,
   1598     0x2537,
   1599     0x2539,
   1600     0x253B,
   1601     0x253D,
   1602     0x253F,  /* U+FF80 */
   1603     0x2541,
   1604     0x2544,
   1605     0x2546,
   1606     0x2548,
   1607     0x254A,
   1608     0x254B,
   1609     0x254C,
   1610     0x254D,
   1611     0x254E,
   1612     0x254F,
   1613     0x2552,
   1614     0x2555,
   1615     0x2558,
   1616     0x255B,
   1617     0x255E,
   1618     0x255F,  /* U+FF90 */
   1619     0x2560,
   1620     0x2561,
   1621     0x2562,
   1622     0x2564,
   1623     0x2566,
   1624     0x2568,
   1625     0x2569,
   1626     0x256A,
   1627     0x256B,
   1628     0x256C,
   1629     0x256D,
   1630     0x256F,
   1631     0x2573,
   1632     0x212B,
   1633     0x212C   /* U+FF9F */
   1634 };
   1635 
   1636 static void
   1637 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1638     UConverter *cnv = args->converter;
   1639     UConverterDataISO2022 *converterData;
   1640     ISO2022State *pFromU2022State;
   1641     uint8_t *target = (uint8_t *) args->target;
   1642     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   1643     const UChar* source = args->source;
   1644     const UChar* sourceLimit = args->sourceLimit;
   1645     int32_t* offsets = args->offsets;
   1646     UChar32 sourceChar;
   1647     char buffer[8];
   1648     int32_t len, outLen;
   1649     int8_t choices[10];
   1650     int32_t choiceCount;
   1651     uint32_t targetValue = 0;
   1652     UBool useFallback;
   1653 
   1654     int32_t i;
   1655     int8_t cs, g;
   1656 
   1657     /* set up the state */
   1658     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   1659     pFromU2022State   = &converterData->fromU2022State;
   1660 
   1661     choiceCount = 0;
   1662 
   1663     /* check if the last codepoint of previous buffer was a lead surrogate*/
   1664     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1665         goto getTrail;
   1666     }
   1667 
   1668     while(source < sourceLimit) {
   1669         if(target < targetLimit) {
   1670 
   1671             sourceChar  = *(source++);
   1672             /*check if the char is a First surrogate*/
   1673             if(U16_IS_SURROGATE(sourceChar)) {
   1674                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   1675 getTrail:
   1676                     /*look ahead to find the trail surrogate*/
   1677                     if(source < sourceLimit) {
   1678                         /* test the following code unit */
   1679                         UChar trail=(UChar) *source;
   1680                         if(U16_IS_TRAIL(trail)) {
   1681                             source++;
   1682                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   1683                             cnv->fromUChar32=0x00;
   1684                             /* convert this supplementary code point */
   1685                             /* exit this condition tree */
   1686                         } else {
   1687                             /* this is an unmatched lead code unit (1st surrogate) */
   1688                             /* callback(illegal) */
   1689                             *err=U_ILLEGAL_CHAR_FOUND;
   1690                             cnv->fromUChar32=sourceChar;
   1691                             break;
   1692                         }
   1693                     } else {
   1694                         /* no more input */
   1695                         cnv->fromUChar32=sourceChar;
   1696                         break;
   1697                     }
   1698                 } else {
   1699                     /* this is an unmatched trail code unit (2nd surrogate) */
   1700                     /* callback(illegal) */
   1701                     *err=U_ILLEGAL_CHAR_FOUND;
   1702                     cnv->fromUChar32=sourceChar;
   1703                     break;
   1704                 }
   1705             }
   1706 
   1707             /* do not convert SO/SI/ESC */
   1708             if(IS_2022_CONTROL(sourceChar)) {
   1709                 /* callback(illegal) */
   1710                 *err=U_ILLEGAL_CHAR_FOUND;
   1711                 cnv->fromUChar32=sourceChar;
   1712                 break;
   1713             }
   1714 
   1715             /* do the conversion */
   1716 
   1717             if(choiceCount == 0) {
   1718                 uint16_t csm;
   1719 
   1720                 /*
   1721                  * The csm variable keeps track of which charsets are allowed
   1722                  * and not used yet while building the choices[].
   1723                  */
   1724                 csm = jpCharsetMasks[converterData->version];
   1725                 choiceCount = 0;
   1726 
   1727                 /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1728                 if(converterData->version == 3 || converterData->version == 4) {
   1729                     choices[choiceCount++] = (int8_t)HWKANA_7BIT;
   1730                 }
   1731                 /* Do not try single-byte half-width Katakana for other versions. */
   1732                 csm &= ~CSM(HWKANA_7BIT);
   1733 
   1734                 /* try the current G0 charset */
   1735                 choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1736                 csm &= ~CSM(cs);
   1737 
   1738                 /* try the current G2 charset */
   1739                 if((cs = pFromU2022State->cs[2]) != 0) {
   1740                     choices[choiceCount++] = cs;
   1741                     csm &= ~CSM(cs);
   1742                 }
   1743 
   1744                 /* try all the other possible charsets */
   1745                 for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {
   1746                     cs = (int8_t)jpCharsetPref[i];
   1747                     if(CSM(cs) & csm) {
   1748                         choices[choiceCount++] = cs;
   1749                         csm &= ~CSM(cs);
   1750                     }
   1751                 }
   1752             }
   1753 
   1754             cs = g = 0;
   1755             /*
   1756              * len==0: no mapping found yet
   1757              * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1758              * len>0: found a roundtrip result, done
   1759              */
   1760             len = 0;
   1761             /*
   1762              * We will turn off useFallback after finding a fallback,
   1763              * but we still get fallbacks from PUA code points as usual.
   1764              * Therefore, we will also need to check that we don't overwrite
   1765              * an early fallback with a later one.
   1766              */
   1767             useFallback = cnv->useFallback;
   1768 
   1769             for(i = 0; i < choiceCount && len <= 0; ++i) {
   1770                 uint32_t value;
   1771                 int32_t len2;
   1772                 int8_t cs0 = choices[i];
   1773                 switch(cs0) {
   1774                 case ASCII:
   1775                     if(sourceChar <= 0x7f) {
   1776                         targetValue = (uint32_t)sourceChar;
   1777                         len = 1;
   1778                         cs = cs0;
   1779                         g = 0;
   1780                     }
   1781                     break;
   1782                 case ISO8859_1:
   1783                     if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1784                         targetValue = (uint32_t)sourceChar - 0x80;
   1785                         len = 1;
   1786                         cs = cs0;
   1787                         g = 2;
   1788                     }
   1789                     break;
   1790                 case HWKANA_7BIT:
   1791                     if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1792                         if(converterData->version==3) {
   1793                             /* JIS7: use G1 (SO) */
   1794                             /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1795                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
   1796                             len = 1;
   1797                             pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1798                             g = 1;
   1799                         } else if(converterData->version==4) {
   1800                             /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1801                             /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1802                             targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
   1803                             len = 1;
   1804 
   1805                             cs = pFromU2022State->cs[0];
   1806                             if(IS_JP_DBCS(cs)) {
   1807                                 /* switch from a DBCS charset to JISX201 */
   1808                                 cs = (int8_t)JISX201;
   1809                             }
   1810                             /* else stay in the current G0 charset */
   1811                             g = 0;
   1812                         }
   1813                         /* else do not use HWKANA_7BIT with other versions */
   1814                     }
   1815                     break;
   1816                 case JISX201:
   1817                     /* G0 SBCS */
   1818                     value = jisx201FromU(sourceChar);
   1819                     if(value <= 0x7f) {
   1820                         targetValue = value;
   1821                         len = 1;
   1822                         cs = cs0;
   1823                         g = 0;
   1824                         useFallback = FALSE;
   1825                     }
   1826                     break;
   1827                 case JISX208:
   1828                     /* G0 DBCS from Shift-JIS table */
   1829                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1830                                 converterData->myConverterArray[cs0],
   1831                                 sourceChar, &value,
   1832                                 useFallback, MBCS_OUTPUT_2);
   1833                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1834                         value = _2022FromSJIS(value);
   1835                         if(value != 0) {
   1836                             targetValue = value;
   1837                             len = len2;
   1838                             cs = cs0;
   1839                             g = 0;
   1840                             useFallback = FALSE;
   1841                         }
   1842                     } else if(len == 0 && useFallback &&
   1843                               (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1844                         targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1845                         len = -2;
   1846                         cs = cs0;
   1847                         g = 0;
   1848                         useFallback = FALSE;
   1849                     }
   1850                     break;
   1851                 case ISO8859_7:
   1852                     /* G0 SBCS forced to 7-bit output */
   1853                     len2 = MBCS_SINGLE_FROM_UCHAR32(
   1854                                 converterData->myConverterArray[cs0],
   1855                                 sourceChar, &value,
   1856                                 useFallback);
   1857                     if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1858                         targetValue = value - 0x80;
   1859                         len = len2;
   1860                         cs = cs0;
   1861                         g = 2;
   1862                         useFallback = FALSE;
   1863                     }
   1864                     break;
   1865                 default:
   1866                     /* G0 DBCS */
   1867                     len2 = MBCS_FROM_UCHAR32_ISO2022(
   1868                                 converterData->myConverterArray[cs0],
   1869                                 sourceChar, &value,
   1870                                 useFallback, MBCS_OUTPUT_2);
   1871                     if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1872                         if(cs0 == KSC5601) {
   1873                             /*
   1874                              * Check for valid bytes for the encoding scheme.
   1875                              * This is necessary because the sub-converter (windows-949)
   1876                              * has a broader encoding scheme than is valid for 2022.
   1877                              */
   1878                             value = _2022FromGR94DBCS(value);
   1879                             if(value == 0) {
   1880                                 break;
   1881                             }
   1882                         }
   1883                         targetValue = value;
   1884                         len = len2;
   1885                         cs = cs0;
   1886                         g = 0;
   1887                         useFallback = FALSE;
   1888                     }
   1889                     break;
   1890                 }
   1891             }
   1892 
   1893             if(len != 0) {
   1894                 if(len < 0) {
   1895                     len = -len;  /* fallback */
   1896                 }
   1897                 outLen = 0; /* count output bytes */
   1898 
   1899                 /* write SI if necessary (only for JIS7) */
   1900                 if(pFromU2022State->g == 1 && g == 0) {
   1901                     buffer[outLen++] = UCNV_SI;
   1902                     pFromU2022State->g = 0;
   1903                 }
   1904 
   1905                 /* write the designation sequence if necessary */
   1906                 if(cs != pFromU2022State->cs[g]) {
   1907                     int32_t escLen = escSeqCharsLen[cs];
   1908                     uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1909                     outLen += escLen;
   1910                     pFromU2022State->cs[g] = cs;
   1911 
   1912                     /* invalidate the choices[] */
   1913                     choiceCount = 0;
   1914                 }
   1915 
   1916                 /* write the shift sequence if necessary */
   1917                 if(g != pFromU2022State->g) {
   1918                     switch(g) {
   1919                     /* case 0 handled before writing escapes */
   1920                     case 1:
   1921                         buffer[outLen++] = UCNV_SO;
   1922                         pFromU2022State->g = 1;
   1923                         break;
   1924                     default: /* case 2 */
   1925                         buffer[outLen++] = 0x1b;
   1926                         buffer[outLen++] = 0x4e;
   1927                         break;
   1928                     /* no case 3: no SS3 in ISO-2022-JP-x */
   1929                     }
   1930                 }
   1931 
   1932                 /* write the output bytes */
   1933                 if(len == 1) {
   1934                     buffer[outLen++] = (char)targetValue;
   1935                 } else /* len == 2 */ {
   1936                     buffer[outLen++] = (char)(targetValue >> 8);
   1937                     buffer[outLen++] = (char)targetValue;
   1938                 }
   1939             } else {
   1940                 /*
   1941                  * if we cannot find the character after checking all codepages
   1942                  * then this is an error
   1943                  */
   1944                 *err = U_INVALID_CHAR_FOUND;
   1945                 cnv->fromUChar32=sourceChar;
   1946                 break;
   1947             }
   1948 
   1949             if(sourceChar == CR || sourceChar == LF) {
   1950                 /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1951                 pFromU2022State->cs[2] = 0;
   1952                 choiceCount = 0;
   1953             }
   1954 
   1955             /* output outLen>0 bytes in buffer[] */
   1956             if(outLen == 1) {
   1957                 *target++ = buffer[0];
   1958                 if(offsets) {
   1959                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   1960                 }
   1961             } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1962                 *target++ = buffer[0];
   1963                 *target++ = buffer[1];
   1964                 if(offsets) {
   1965                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   1966                     *offsets++ = sourceIndex;
   1967                     *offsets++ = sourceIndex;
   1968                 }
   1969             } else {
   1970                 fromUWriteUInt8(
   1971                     cnv,
   1972                     buffer, outLen,
   1973                     &target, (const char *)targetLimit,
   1974                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   1975                     err);
   1976                 if(U_FAILURE(*err)) {
   1977                     break;
   1978                 }
   1979             }
   1980         } /* end if(myTargetIndex<myTargetLength) */
   1981         else{
   1982             *err =U_BUFFER_OVERFLOW_ERROR;
   1983             break;
   1984         }
   1985 
   1986     }/* end while(mySourceIndex<mySourceLength) */
   1987 
   1988     /*
   1989      * the end of the input stream and detection of truncated input
   1990      * are handled by the framework, but for ISO-2022-JP conversion
   1991      * we need to be in ASCII mode at the very end
   1992      *
   1993      * conditions:
   1994      *   successful
   1995      *   in SO mode or not in ASCII mode
   1996      *   end of input and no truncated input
   1997      */
   1998     if( U_SUCCESS(*err) &&
   1999         (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   2000         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   2001     ) {
   2002         int32_t sourceIndex;
   2003 
   2004         outLen = 0;
   2005 
   2006         if(pFromU2022State->g != 0) {
   2007             buffer[outLen++] = UCNV_SI;
   2008             pFromU2022State->g = 0;
   2009         }
   2010 
   2011         if(pFromU2022State->cs[0] != ASCII) {
   2012             int32_t escLen = escSeqCharsLen[ASCII];
   2013             uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   2014             outLen += escLen;
   2015             pFromU2022State->cs[0] = (int8_t)ASCII;
   2016         }
   2017 
   2018         /* get the source index of the last input character */
   2019         /*
   2020          * TODO this would be simpler and more reliable if we used a pair
   2021          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2022          * so that we could simply use the prevSourceIndex here;
   2023          * this code gives an incorrect result for the rare case of an unmatched
   2024          * trail surrogate that is alone in the last buffer of the text stream
   2025          */
   2026         sourceIndex=(int32_t)(source-args->source);
   2027         if(sourceIndex>0) {
   2028             --sourceIndex;
   2029             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2030                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2031             ) {
   2032                 --sourceIndex;
   2033             }
   2034         } else {
   2035             sourceIndex=-1;
   2036         }
   2037 
   2038         fromUWriteUInt8(
   2039             cnv,
   2040             buffer, outLen,
   2041             &target, (const char *)targetLimit,
   2042             &offsets, sourceIndex,
   2043             err);
   2044     }
   2045 
   2046     /*save the state and return */
   2047     args->source = source;
   2048     args->target = (char*)target;
   2049 }
   2050 
   2051 /*************** to unicode *******************/
   2052 
   2053 static void
   2054 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2055                                                UErrorCode* err){
   2056     char tempBuf[2];
   2057     const char *mySource = (char *) args->source;
   2058     UChar *myTarget = args->target;
   2059     const char *mySourceLimit = args->sourceLimit;
   2060     uint32_t targetUniChar = 0x0000;
   2061     uint32_t mySourceChar = 0x0000;
   2062     uint32_t tmpSourceChar = 0x0000;
   2063     UConverterDataISO2022* myData;
   2064     ISO2022State *pToU2022State;
   2065     StateEnum cs;
   2066 
   2067     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2068     pToU2022State = &myData->toU2022State;
   2069 
   2070     if(myData->key != 0) {
   2071         /* continue with a partial escape sequence */
   2072         goto escape;
   2073     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2074         /* continue with a partial double-byte character */
   2075         mySourceChar = args->converter->toUBytes[0];
   2076         args->converter->toULength = 0;
   2077         cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2078         targetUniChar = missingCharMarker;
   2079         goto getTrailByte;
   2080     }
   2081 
   2082     while(mySource < mySourceLimit){
   2083 
   2084         targetUniChar =missingCharMarker;
   2085 
   2086         if(myTarget < args->targetLimit){
   2087 
   2088             mySourceChar= (unsigned char) *mySource++;
   2089 
   2090             switch(mySourceChar) {
   2091             case UCNV_SI:
   2092                 if(myData->version==3) {
   2093                     pToU2022State->g=0;
   2094                     continue;
   2095                 } else {
   2096                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2097                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2098                     break;
   2099                 }
   2100 
   2101             case UCNV_SO:
   2102                 if(myData->version==3) {
   2103                     /* JIS7: switch to G1 half-width Katakana */
   2104                     pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
   2105                     pToU2022State->g=1;
   2106                     continue;
   2107                 } else {
   2108                     /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2109                     myData->isEmptySegment = FALSE;	/* reset this, we have a different error */
   2110                     break;
   2111                 }
   2112 
   2113             case ESC_2022:
   2114                 mySource--;
   2115 escape:
   2116                 {
   2117                     const char * mySourceBefore = mySource;
   2118                     int8_t toULengthBefore = args->converter->toULength;
   2119 
   2120                     changeState_2022(args->converter,&(mySource),
   2121                         mySourceLimit, ISO_2022_JP,err);
   2122 
   2123                     /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
   2124                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2125                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2126                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2127                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   2128                     }
   2129                 }
   2130 
   2131                 /* invalid or illegal escape sequence */
   2132                 if(U_FAILURE(*err)){
   2133                     args->target = myTarget;
   2134                     args->source = mySource;
   2135                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   2136                     return;
   2137                 }
   2138                 /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2139                 if(myData->key==0) {
   2140                     myData->isEmptySegment = TRUE;
   2141                 }
   2142                 continue;
   2143 
   2144             /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2145 
   2146             case CR:
   2147                 /*falls through*/
   2148             case LF:
   2149                 /* automatically reset to single-byte mode */
   2150                 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {
   2151                     pToU2022State->cs[0] = (int8_t)ASCII;
   2152                 }
   2153                 pToU2022State->cs[2] = 0;
   2154                 pToU2022State->g = 0;
   2155                 /* falls through */
   2156             default:
   2157                 /* convert one or two bytes */
   2158                 myData->isEmptySegment = FALSE;
   2159                 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
   2160                 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&
   2161                     !IS_JP_DBCS(cs)
   2162                 ) {
   2163                     /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2164                     targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2165 
   2166                     /* return from a single-shift state to the previous one */
   2167                     if(pToU2022State->g >= 2) {
   2168                         pToU2022State->g=pToU2022State->prevG;
   2169                     }
   2170                 } else switch(cs) {
   2171                 case ASCII:
   2172                     if(mySourceChar <= 0x7f) {
   2173                         targetUniChar = mySourceChar;
   2174                     }
   2175                     break;
   2176                 case ISO8859_1:
   2177                     if(mySourceChar <= 0x7f) {
   2178                         targetUniChar = mySourceChar + 0x80;
   2179                     }
   2180                     /* return from a single-shift state to the previous one */
   2181                     pToU2022State->g=pToU2022State->prevG;
   2182                     break;
   2183                 case ISO8859_7:
   2184                     if(mySourceChar <= 0x7f) {
   2185                         /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2186                         targetUniChar =
   2187                             _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2188                                 myData->myConverterArray[cs],
   2189                                 mySourceChar + 0x80);
   2190                     }
   2191                     /* return from a single-shift state to the previous one */
   2192                     pToU2022State->g=pToU2022State->prevG;
   2193                     break;
   2194                 case JISX201:
   2195                     if(mySourceChar <= 0x7f) {
   2196                         targetUniChar = jisx201ToU(mySourceChar);
   2197                     }
   2198                     break;
   2199                 case HWKANA_7BIT:
   2200                     if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2201                         /* 7-bit halfwidth Katakana */
   2202                         targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2203                     }
   2204                     break;
   2205                 default:
   2206                     /* G0 DBCS */
   2207                     if(mySource < mySourceLimit) {
   2208                         int leadIsOk, trailIsOk;
   2209                         uint8_t trailByte;
   2210 getTrailByte:
   2211                         trailByte = (uint8_t)*mySource;
   2212                         /*
   2213                          * Ticket 5691: consistent illegal sequences:
   2214                          * - We include at least the first byte in the illegal sequence.
   2215                          * - If any of the non-initial bytes could be the start of a character,
   2216                          *   we stop the illegal sequence before the first one of those.
   2217                          *
   2218                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2219                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2220                          * Otherwise we convert or report the pair of bytes.
   2221                          */
   2222                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2223                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2224                         if (leadIsOk && trailIsOk) {
   2225                             ++mySource;
   2226                             tmpSourceChar = (mySourceChar << 8) | trailByte;
   2227                             if(cs == JISX208) {
   2228                                 _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf);
   2229                                 mySourceChar = tmpSourceChar;
   2230                             } else {
   2231                                 /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2232                                 mySourceChar = tmpSourceChar;
   2233                                 if (cs == KSC5601) {
   2234                                     tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2235                                 }
   2236                                 tempBuf[0] = (char)(tmpSourceChar >> 8);
   2237                                 tempBuf[1] = (char)(tmpSourceChar);
   2238                             }
   2239                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
   2240                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2241                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2242                             ++mySource;
   2243                             /* add another bit so that the code below writes 2 bytes in case of error */
   2244                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2245                         }
   2246                     } else {
   2247                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2248                         args->converter->toULength = 1;
   2249                         goto endloop;
   2250                     }
   2251                 }  /* End of inner switch */
   2252                 break;
   2253             }  /* End of outer switch */
   2254             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2255                 if(args->offsets){
   2256                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2257                 }
   2258                 *(myTarget++)=(UChar)targetUniChar;
   2259             }
   2260             else if(targetUniChar > missingCharMarker){
   2261                 /* disassemble the surrogate pair and write to output*/
   2262                 targetUniChar-=0x0010000;
   2263                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   2264                 if(args->offsets){
   2265                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2266                 }
   2267                 ++myTarget;
   2268                 if(myTarget< args->targetLimit){
   2269                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2270                     if(args->offsets){
   2271                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2272                     }
   2273                     ++myTarget;
   2274                 }else{
   2275                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2276                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   2277                 }
   2278 
   2279             }
   2280             else{
   2281                 /* Call the callback function*/
   2282                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2283                 break;
   2284             }
   2285         }
   2286         else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2287             *err =U_BUFFER_OVERFLOW_ERROR;
   2288             break;
   2289         }
   2290     }
   2291 endloop:
   2292     args->target = myTarget;
   2293     args->source = mySource;
   2294 }
   2295 
   2296 
   2297 /***************************************************************
   2298 *   Rules for ISO-2022-KR encoding
   2299 *   i) The KSC5601 designator sequence should appear only once in a file,
   2300 *      at the begining of a line before any KSC5601 characters. This usually
   2301 *      means that it appears by itself on the first line of the file
   2302 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2303 *      and SI to shift into single byte mode
   2304 */
   2305 static void
   2306 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2307 
   2308     UConverter* saveConv = args->converter;
   2309     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;
   2310     args->converter=myConverterData->currentConverter;
   2311 
   2312     myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2313     ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2314     saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2315 
   2316     if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2317         if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2318             uprv_memcpy(
   2319                 saveConv->charErrorBuffer,
   2320                 myConverterData->currentConverter->charErrorBuffer,
   2321                 myConverterData->currentConverter->charErrorBufferLength);
   2322         }
   2323         saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2324         myConverterData->currentConverter->charErrorBufferLength = 0;
   2325     }
   2326     args->converter=saveConv;
   2327 }
   2328 
   2329 static void
   2330 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2331 
   2332     const UChar *source = args->source;
   2333     const UChar *sourceLimit = args->sourceLimit;
   2334     unsigned char *target = (unsigned char *) args->target;
   2335     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
   2336     int32_t* offsets = args->offsets;
   2337     uint32_t targetByteUnit = 0x0000;
   2338     UChar32 sourceChar = 0x0000;
   2339     UBool isTargetByteDBCS;
   2340     UBool oldIsTargetByteDBCS;
   2341     UConverterDataISO2022 *converterData;
   2342     UConverterSharedData* sharedData;
   2343     UBool useFallback;
   2344     int32_t length =0;
   2345 
   2346     converterData=(UConverterDataISO2022*)args->converter->extraInfo;
   2347     /* if the version is 1 then the user is requesting
   2348      * conversion with ibm-25546 pass the arguments to
   2349      * MBCS converter and return
   2350      */
   2351     if(converterData->version==1){
   2352         UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2353         return;
   2354     }
   2355 
   2356     /* initialize data */
   2357     sharedData = converterData->currentConverter->sharedData;
   2358     useFallback = args->converter->useFallback;
   2359     isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;
   2360     oldIsTargetByteDBCS = isTargetByteDBCS;
   2361 
   2362     isTargetByteDBCS   = (UBool) args->converter->fromUnicodeStatus;
   2363     if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2364         goto getTrail;
   2365     }
   2366     while(source < sourceLimit){
   2367 
   2368         targetByteUnit = missingCharMarker;
   2369 
   2370         if(target < (unsigned char*) args->targetLimit){
   2371             sourceChar = *source++;
   2372 
   2373             /* do not convert SO/SI/ESC */
   2374             if(IS_2022_CONTROL(sourceChar)) {
   2375                 /* callback(illegal) */
   2376                 *err=U_ILLEGAL_CHAR_FOUND;
   2377                 args->converter->fromUChar32=sourceChar;
   2378                 break;
   2379             }
   2380 
   2381             length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2382             if(length < 0) {
   2383                 length = -length;  /* fallback */
   2384             }
   2385             /* only DBCS or SBCS characters are expected*/
   2386             /* DB characters with high bit set to 1 are expected */
   2387             if( length > 2 || length==0 ||
   2388                 (length == 1 && targetByteUnit > 0x7f) ||
   2389                 (length == 2 &&
   2390                     ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2391                     (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2392             ) {
   2393                 targetByteUnit=missingCharMarker;
   2394             }
   2395             if (targetByteUnit != missingCharMarker){
   2396 
   2397                 oldIsTargetByteDBCS = isTargetByteDBCS;
   2398                 isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);
   2399                   /* append the shift sequence */
   2400                 if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2401 
   2402                     if (isTargetByteDBCS)
   2403                         *target++ = UCNV_SO;
   2404                     else
   2405                         *target++ = UCNV_SI;
   2406                     if(offsets)
   2407                         *(offsets++) = (int32_t)(source - args->source-1);
   2408                 }
   2409                 /* write the targetUniChar  to target */
   2410                 if(targetByteUnit <= 0x00FF){
   2411                     if( target < targetLimit){
   2412                         *(target++) = (unsigned char) targetByteUnit;
   2413                         if(offsets){
   2414                             *(offsets++) = (int32_t)(source - args->source-1);
   2415                         }
   2416 
   2417                     }else{
   2418                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
   2419                         *err = U_BUFFER_OVERFLOW_ERROR;
   2420                     }
   2421                 }else{
   2422                     if(target < targetLimit){
   2423                         *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);
   2424                         if(offsets){
   2425                             *(offsets++) = (int32_t)(source - args->source-1);
   2426                         }
   2427                         if(target < targetLimit){
   2428                             *(target++) =(unsigned char) (targetByteUnit -0x80);
   2429                             if(offsets){
   2430                                 *(offsets++) = (int32_t)(source - args->source-1);
   2431                             }
   2432                         }else{
   2433                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);
   2434                             *err = U_BUFFER_OVERFLOW_ERROR;
   2435                         }
   2436                     }else{
   2437                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);
   2438                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);
   2439                         *err = U_BUFFER_OVERFLOW_ERROR;
   2440                     }
   2441                 }
   2442 
   2443             }
   2444             else{
   2445                 /* oops.. the code point is unassingned
   2446                  * set the error and reason
   2447                  */
   2448 
   2449                 /*check if the char is a First surrogate*/
   2450                 if(U16_IS_SURROGATE(sourceChar)) {
   2451                     if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2452 getTrail:
   2453                         /*look ahead to find the trail surrogate*/
   2454                         if(source <  sourceLimit) {
   2455                             /* test the following code unit */
   2456                             UChar trail=(UChar) *source;
   2457                             if(U16_IS_TRAIL(trail)) {
   2458                                 source++;
   2459                                 sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2460                                 *err = U_INVALID_CHAR_FOUND;
   2461                                 /* convert this surrogate code point */
   2462                                 /* exit this condition tree */
   2463                             } else {
   2464                                 /* this is an unmatched lead code unit (1st surrogate) */
   2465                                 /* callback(illegal) */
   2466                                 *err=U_ILLEGAL_CHAR_FOUND;
   2467                             }
   2468                         } else {
   2469                             /* no more input */
   2470                             *err = U_ZERO_ERROR;
   2471                         }
   2472                     } else {
   2473                         /* this is an unmatched trail code unit (2nd surrogate) */
   2474                         /* callback(illegal) */
   2475                         *err=U_ILLEGAL_CHAR_FOUND;
   2476                     }
   2477                 } else {
   2478                     /* callback(unassigned) for a BMP code point */
   2479                     *err = U_INVALID_CHAR_FOUND;
   2480                 }
   2481 
   2482                 args->converter->fromUChar32=sourceChar;
   2483                 break;
   2484             }
   2485         } /* end if(myTargetIndex<myTargetLength) */
   2486         else{
   2487             *err =U_BUFFER_OVERFLOW_ERROR;
   2488             break;
   2489         }
   2490 
   2491     }/* end while(mySourceIndex<mySourceLength) */
   2492 
   2493     /*
   2494      * the end of the input stream and detection of truncated input
   2495      * are handled by the framework, but for ISO-2022-KR conversion
   2496      * we need to be in ASCII mode at the very end
   2497      *
   2498      * conditions:
   2499      *   successful
   2500      *   not in ASCII mode
   2501      *   end of input and no truncated input
   2502      */
   2503     if( U_SUCCESS(*err) &&
   2504         isTargetByteDBCS &&
   2505         args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2506     ) {
   2507         int32_t sourceIndex;
   2508 
   2509         /* we are switching to ASCII */
   2510         isTargetByteDBCS=FALSE;
   2511 
   2512         /* get the source index of the last input character */
   2513         /*
   2514          * TODO this would be simpler and more reliable if we used a pair
   2515          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2516          * so that we could simply use the prevSourceIndex here;
   2517          * this code gives an incorrect result for the rare case of an unmatched
   2518          * trail surrogate that is alone in the last buffer of the text stream
   2519          */
   2520         sourceIndex=(int32_t)(source-args->source);
   2521         if(sourceIndex>0) {
   2522             --sourceIndex;
   2523             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2524                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2525             ) {
   2526                 --sourceIndex;
   2527             }
   2528         } else {
   2529             sourceIndex=-1;
   2530         }
   2531 
   2532         fromUWriteUInt8(
   2533             args->converter,
   2534             SHIFT_IN_STR, 1,
   2535             &target, (const char *)targetLimit,
   2536             &offsets, sourceIndex,
   2537             err);
   2538     }
   2539 
   2540     /*save the state and return */
   2541     args->source = source;
   2542     args->target = (char*)target;
   2543     args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;
   2544 }
   2545 
   2546 /************************ To Unicode ***************************************/
   2547 
   2548 static void
   2549 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2550                                                             UErrorCode* err){
   2551     char const* sourceStart;
   2552     UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2553 
   2554     UConverterToUnicodeArgs subArgs;
   2555     int32_t minArgsSize;
   2556 
   2557     /* set up the subconverter arguments */
   2558     if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2559         minArgsSize = args->size;
   2560     } else {
   2561         minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);
   2562     }
   2563 
   2564     uprv_memcpy(&subArgs, args, minArgsSize);
   2565     subArgs.size = (uint16_t)minArgsSize;
   2566     subArgs.converter = myData->currentConverter;
   2567 
   2568     /* remember the original start of the input for offsets */
   2569     sourceStart = args->source;
   2570 
   2571     if(myData->key != 0) {
   2572         /* continue with a partial escape sequence */
   2573         goto escape;
   2574     }
   2575 
   2576     while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2577         /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2578         subArgs.source = args->source;
   2579         subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2580         if(subArgs.source != subArgs.sourceLimit) {
   2581             /*
   2582              * get the current partial byte sequence
   2583              *
   2584              * it needs to be moved between the public and the subconverter
   2585              * so that the conversion framework, which only sees the public
   2586              * converter, can handle truncated and illegal input etc.
   2587              */
   2588             if(args->converter->toULength > 0) {
   2589                 uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2590             }
   2591             subArgs.converter->toULength = args->converter->toULength;
   2592 
   2593             /*
   2594              * Convert up to the end of the input, or to before the next escape character.
   2595              * Does not handle conversion extensions because the preToU[] state etc.
   2596              * is not copied.
   2597              */
   2598             ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2599 
   2600             if(args->offsets != NULL && sourceStart != args->source) {
   2601                 /* update offsets to base them on the actual start of the input */
   2602                 int32_t *offsets = args->offsets;
   2603                 UChar *target = args->target;
   2604                 int32_t delta = (int32_t)(args->source - sourceStart);
   2605                 while(target < subArgs.target) {
   2606                     if(*offsets >= 0) {
   2607                         *offsets += delta;
   2608                     }
   2609                     ++offsets;
   2610                     ++target;
   2611                 }
   2612             }
   2613             args->source = subArgs.source;
   2614             args->target = subArgs.target;
   2615             args->offsets = subArgs.offsets;
   2616 
   2617             /* copy input/error/overflow buffers */
   2618             if(subArgs.converter->toULength > 0) {
   2619                 uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2620             }
   2621             args->converter->toULength = subArgs.converter->toULength;
   2622 
   2623             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2624                 if(subArgs.converter->UCharErrorBufferLength > 0) {
   2625                     uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2626                                 subArgs.converter->UCharErrorBufferLength);
   2627                 }
   2628                 args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2629                 subArgs.converter->UCharErrorBufferLength = 0;
   2630             }
   2631         }
   2632 
   2633         if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2634             return;
   2635         }
   2636 
   2637 escape:
   2638         changeState_2022(args->converter,
   2639                &(args->source),
   2640                args->sourceLimit,
   2641                ISO_2022_KR,
   2642                err);
   2643     }
   2644 }
   2645 
   2646 static void
   2647 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2648                                                             UErrorCode* err){
   2649     char tempBuf[2];
   2650     const char *mySource = ( char *) args->source;
   2651     UChar *myTarget = args->target;
   2652     const char *mySourceLimit = args->sourceLimit;
   2653     UChar32 targetUniChar = 0x0000;
   2654     UChar mySourceChar = 0x0000;
   2655     UConverterDataISO2022* myData;
   2656     UConverterSharedData* sharedData ;
   2657     UBool useFallback;
   2658 
   2659     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   2660     if(myData->version==1){
   2661         UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2662         return;
   2663     }
   2664 
   2665     /* initialize state */
   2666     sharedData = myData->currentConverter->sharedData;
   2667     useFallback = args->converter->useFallback;
   2668 
   2669     if(myData->key != 0) {
   2670         /* continue with a partial escape sequence */
   2671         goto escape;
   2672     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2673         /* continue with a partial double-byte character */
   2674         mySourceChar = args->converter->toUBytes[0];
   2675         args->converter->toULength = 0;
   2676         goto getTrailByte;
   2677     }
   2678 
   2679     while(mySource< mySourceLimit){
   2680 
   2681         if(myTarget < args->targetLimit){
   2682 
   2683             mySourceChar= (unsigned char) *mySource++;
   2684 
   2685             if(mySourceChar==UCNV_SI){
   2686                 myData->toU2022State.g = 0;
   2687                 if (myData->isEmptySegment) {
   2688                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   2689                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2690                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2691                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2692                     args->converter->toULength = 1;
   2693                     args->target = myTarget;
   2694                     args->source = mySource;
   2695                     return;
   2696                 }
   2697                 /*consume the source */
   2698                 continue;
   2699             }else if(mySourceChar==UCNV_SO){
   2700                 myData->toU2022State.g = 1;
   2701                 myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   2702                 /*consume the source */
   2703                 continue;
   2704             }else if(mySourceChar==ESC_2022){
   2705                 mySource--;
   2706 escape:
   2707                 myData->isEmptySegment = FALSE;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2708                 changeState_2022(args->converter,&(mySource),
   2709                                 mySourceLimit, ISO_2022_KR, err);
   2710                 if(U_FAILURE(*err)){
   2711                     args->target = myTarget;
   2712                     args->source = mySource;
   2713                     return;
   2714                 }
   2715                 continue;
   2716             }
   2717 
   2718             myData->isEmptySegment = FALSE;	/* Any invalid char errors will be detected separately, so just reset this */
   2719             if(myData->toU2022State.g == 1) {
   2720                 if(mySource < mySourceLimit) {
   2721                     int leadIsOk, trailIsOk;
   2722                     uint8_t trailByte;
   2723 getTrailByte:
   2724                     targetUniChar = missingCharMarker;
   2725                     trailByte = (uint8_t)*mySource;
   2726                     /*
   2727                      * Ticket 5691: consistent illegal sequences:
   2728                      * - We include at least the first byte in the illegal sequence.
   2729                      * - If any of the non-initial bytes could be the start of a character,
   2730                      *   we stop the illegal sequence before the first one of those.
   2731                      *
   2732                      * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2733                      * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2734                      * Otherwise we convert or report the pair of bytes.
   2735                      */
   2736                     leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   2737                     trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   2738                     if (leadIsOk && trailIsOk) {
   2739                         ++mySource;
   2740                         tempBuf[0] = (char)(mySourceChar + 0x80);
   2741                         tempBuf[1] = (char)(trailByte + 0x80);
   2742                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2743                         mySourceChar = (mySourceChar << 8) | trailByte;
   2744                     } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2745                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2746                         ++mySource;
   2747                         /* add another bit so that the code below writes 2 bytes in case of error */
   2748                         mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2749                     }
   2750                 } else {
   2751                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   2752                     args->converter->toULength = 1;
   2753                     break;
   2754                 }
   2755             }
   2756             else if(mySourceChar <= 0x7f) {
   2757                 targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2758             } else {
   2759                 targetUniChar = 0xffff;
   2760             }
   2761             if(targetUniChar < 0xfffe){
   2762                 if(args->offsets) {
   2763                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2764                 }
   2765                 *(myTarget++)=(UChar)targetUniChar;
   2766             }
   2767             else {
   2768                 /* Call the callback function*/
   2769                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2770                 break;
   2771             }
   2772         }
   2773         else{
   2774             *err =U_BUFFER_OVERFLOW_ERROR;
   2775             break;
   2776         }
   2777     }
   2778     args->target = myTarget;
   2779     args->source = mySource;
   2780 }
   2781 
   2782 /*************************** END ISO2022-KR *********************************/
   2783 
   2784 /*************************** ISO-2022-CN *********************************
   2785 *
   2786 * Rules for ISO-2022-CN Encoding:
   2787 * i)   The designator sequence must appear once on a line before any instance
   2788 *      of character set it designates.
   2789 * ii)  If two lines contain characters from the same character set, both lines
   2790 *      must include the designator sequence.
   2791 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2792 *      to invoke the  shifting
   2793 * iv)  All lines start in ASCII and end in ASCII.
   2794 * v)   Four shifting sequences are employed for this purpose:
   2795 *
   2796 *      Sequcence   ASCII Eq    Charsets
   2797 *      ----------  -------    ---------
   2798 *      SI           <SI>        US-ASCII
   2799 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2800 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2801 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2802 *
   2803 * vi)
   2804 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2805 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2806 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2807 *
   2808 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2809 *       characters as defined in GB 2312-80, until
   2810 *       another SOdesignation appears
   2811 *
   2812 *
   2813 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2814 *       in ISO-IR-165 (for details, see section 2.1),
   2815 *       until another SOdesignation appears
   2816 *
   2817 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2818 *       in CNS 11643-plane-1, until another
   2819 *       SOdesignation appears
   2820 *
   2821 *      ESC $ * H       Indicates the two bytes immediately following
   2822 *       SS2 is a Chinese character as defined in CNS
   2823 *       11643-plane-2, until another SS2designation
   2824 *       appears
   2825 *       (Meaning <ESC>N must preceed every 2 byte
   2826 *        sequence.)
   2827 *
   2828 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2829 *       is a Chinese character as defined in CNS
   2830 *       11643-plane-3, until another SS3designation
   2831 *       appears
   2832 *       (Meaning <ESC>O must preceed every 2 byte
   2833 *        sequence.)
   2834 *
   2835 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2836 *       is a Chinese character as defined in CNS
   2837 *       11643-plane-4, until another SS3designation
   2838 *       appears
   2839 *       (In English: <ESC>O must preceed every 2 byte
   2840 *        sequence.)
   2841 *
   2842 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2843 *       is a Chinese character as defined in CNS
   2844 *       11643-plane-5, until another SS3designation
   2845 *       appears
   2846 *
   2847 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2848 *       is a Chinese character as defined in CNS
   2849 *       11643-plane-6, until another SS3designation
   2850 *       appears
   2851 *
   2852 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2853 *       is a Chinese character as defined in CNS
   2854 *       11643-plane-7, until another SS3designation
   2855 *       appears
   2856 *
   2857 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2858 *       has its own designation information before any Chinese characters
   2859 *       appear
   2860 *
   2861 */
   2862 
   2863 /* The following are defined this way to make the strings truly readonly */
   2864 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2865 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2866 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2867 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2868 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2869 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2870 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2871 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2872 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2873 
   2874 /********************** ISO2022-CN Data **************************/
   2875 static const char* const escSeqCharsCN[10] ={
   2876         SHIFT_IN_STR,                   /* 0 ASCII */
   2877         GB_2312_80_STR,                 /* 1 GB2312_1 */
   2878         ISO_IR_165_STR,                 /* 2 ISO_IR_165 */
   2879         CNS_11643_1992_Plane_1_STR,
   2880         CNS_11643_1992_Plane_2_STR,
   2881         CNS_11643_1992_Plane_3_STR,
   2882         CNS_11643_1992_Plane_4_STR,
   2883         CNS_11643_1992_Plane_5_STR,
   2884         CNS_11643_1992_Plane_6_STR,
   2885         CNS_11643_1992_Plane_7_STR
   2886 };
   2887 
   2888 static void
   2889 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2890     UConverter *cnv = args->converter;
   2891     UConverterDataISO2022 *converterData;
   2892     ISO2022State *pFromU2022State;
   2893     uint8_t *target = (uint8_t *) args->target;
   2894     const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;
   2895     const UChar* source = args->source;
   2896     const UChar* sourceLimit = args->sourceLimit;
   2897     int32_t* offsets = args->offsets;
   2898     UChar32 sourceChar;
   2899     char buffer[8];
   2900     int32_t len;
   2901     int8_t choices[3];
   2902     int32_t choiceCount;
   2903     uint32_t targetValue = 0;
   2904     UBool useFallback;
   2905 
   2906     /* set up the state */
   2907     converterData     = (UConverterDataISO2022*)cnv->extraInfo;
   2908     pFromU2022State   = &converterData->fromU2022State;
   2909 
   2910     choiceCount = 0;
   2911 
   2912     /* check if the last codepoint of previous buffer was a lead surrogate*/
   2913     if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2914         goto getTrail;
   2915     }
   2916 
   2917     while( source < sourceLimit){
   2918         if(target < targetLimit){
   2919 
   2920             sourceChar  = *(source++);
   2921             /*check if the char is a First surrogate*/
   2922              if(U16_IS_SURROGATE(sourceChar)) {
   2923                 if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2924 getTrail:
   2925                     /*look ahead to find the trail surrogate*/
   2926                     if(source < sourceLimit) {
   2927                         /* test the following code unit */
   2928                         UChar trail=(UChar) *source;
   2929                         if(U16_IS_TRAIL(trail)) {
   2930                             source++;
   2931                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2932                             cnv->fromUChar32=0x00;
   2933                             /* convert this supplementary code point */
   2934                             /* exit this condition tree */
   2935                         } else {
   2936                             /* this is an unmatched lead code unit (1st surrogate) */
   2937                             /* callback(illegal) */
   2938                             *err=U_ILLEGAL_CHAR_FOUND;
   2939                             cnv->fromUChar32=sourceChar;
   2940                             break;
   2941                         }
   2942                     } else {
   2943                         /* no more input */
   2944                         cnv->fromUChar32=sourceChar;
   2945                         break;
   2946                     }
   2947                 } else {
   2948                     /* this is an unmatched trail code unit (2nd surrogate) */
   2949                     /* callback(illegal) */
   2950                     *err=U_ILLEGAL_CHAR_FOUND;
   2951                     cnv->fromUChar32=sourceChar;
   2952                     break;
   2953                 }
   2954             }
   2955 
   2956             /* do the conversion */
   2957             if(sourceChar <= 0x007f ){
   2958                 /* do not convert SO/SI/ESC */
   2959                 if(IS_2022_CONTROL(sourceChar)) {
   2960                     /* callback(illegal) */
   2961                     *err=U_ILLEGAL_CHAR_FOUND;
   2962                     cnv->fromUChar32=sourceChar;
   2963                     break;
   2964                 }
   2965 
   2966                 /* US-ASCII */
   2967                 if(pFromU2022State->g == 0) {
   2968                     buffer[0] = (char)sourceChar;
   2969                     len = 1;
   2970                 } else {
   2971                     buffer[0] = UCNV_SI;
   2972                     buffer[1] = (char)sourceChar;
   2973                     len = 2;
   2974                     pFromU2022State->g = 0;
   2975                     choiceCount = 0;
   2976                 }
   2977                 if(sourceChar == CR || sourceChar == LF) {
   2978                     /* reset the state at the end of a line */
   2979                     uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   2980                     choiceCount = 0;
   2981                 }
   2982             }
   2983             else{
   2984                 /* convert U+0080..U+10ffff */
   2985                 int32_t i;
   2986                 int8_t cs, g;
   2987 
   2988                 if(choiceCount == 0) {
   2989                     /* try the current SO/G1 converter first */
   2990                     choices[0] = pFromU2022State->cs[1];
   2991 
   2992                     /* default to GB2312_1 if none is designated yet */
   2993                     if(choices[0] == 0) {
   2994                         choices[0] = GB2312_1;
   2995                     }
   2996 
   2997                     if(converterData->version == 0) {
   2998                         /* ISO-2022-CN */
   2999 
   3000                         /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   3001                         if(choices[0] == GB2312_1) {
   3002                             choices[1] = (int8_t)CNS_11643_1;
   3003                         } else {
   3004                             choices[1] = (int8_t)GB2312_1;
   3005                         }
   3006 
   3007                         choiceCount = 2;
   3008                     } else if (converterData->version == 1) {
   3009                         /* ISO-2022-CN-EXT */
   3010 
   3011                         /* try one of the other converters */
   3012                         switch(choices[0]) {
   3013                         case GB2312_1:
   3014                             choices[1] = (int8_t)CNS_11643_1;
   3015                             choices[2] = (int8_t)ISO_IR_165;
   3016                             break;
   3017                         case ISO_IR_165:
   3018                             choices[1] = (int8_t)GB2312_1;
   3019                             choices[2] = (int8_t)CNS_11643_1;
   3020                             break;
   3021                         default: /* CNS_11643_x */
   3022                             choices[1] = (int8_t)GB2312_1;
   3023                             choices[2] = (int8_t)ISO_IR_165;
   3024                             break;
   3025                         }
   3026 
   3027                         choiceCount = 3;
   3028                     } else {
   3029                         choices[0] = (int8_t)CNS_11643_1;
   3030                         choices[1] = (int8_t)GB2312_1;
   3031                     }
   3032                 }
   3033 
   3034                 cs = g = 0;
   3035                 /*
   3036                  * len==0: no mapping found yet
   3037                  * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3038                  * len>0: found a roundtrip result, done
   3039                  */
   3040                 len = 0;
   3041                 /*
   3042                  * We will turn off useFallback after finding a fallback,
   3043                  * but we still get fallbacks from PUA code points as usual.
   3044                  * Therefore, we will also need to check that we don't overwrite
   3045                  * an early fallback with a later one.
   3046                  */
   3047                 useFallback = cnv->useFallback;
   3048 
   3049                 for(i = 0; i < choiceCount && len <= 0; ++i) {
   3050                     int8_t cs0 = choices[i];
   3051                     if(cs0 > 0) {
   3052                         uint32_t value;
   3053                         int32_t len2;
   3054                         if(cs0 >= CNS_11643_0) {
   3055                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3056                                         converterData->myConverterArray[CNS_11643],
   3057                                         sourceChar,
   3058                                         &value,
   3059                                         useFallback,
   3060                                         MBCS_OUTPUT_3);
   3061                             if(len2 == 3 || (len2 == -3 && len == 0)) {
   3062                                 targetValue = value;
   3063                                 cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
   3064                                 if(len2 >= 0) {
   3065                                     len = 2;
   3066                                 } else {
   3067                                     len = -2;
   3068                                     useFallback = FALSE;
   3069                                 }
   3070                                 if(cs == CNS_11643_1) {
   3071                                     g = 1;
   3072                                 } else if(cs == CNS_11643_2) {
   3073                                     g = 2;
   3074                                 } else /* plane 3..7 */ if(converterData->version == 1) {
   3075                                     g = 3;
   3076                                 } else {
   3077                                     /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3078                                     len = 0;
   3079                                 }
   3080                             }
   3081                         } else {
   3082                             /* GB2312_1 or ISO-IR-165 */
   3083                             U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
   3084                             len2 = MBCS_FROM_UCHAR32_ISO2022(
   3085                                         converterData->myConverterArray[cs0],
   3086                                         sourceChar,
   3087                                         &value,
   3088                                         useFallback,
   3089                                         MBCS_OUTPUT_2);
   3090                             if(len2 == 2 || (len2 == -2 && len == 0)) {
   3091                                 targetValue = value;
   3092                                 len = len2;
   3093                                 cs = cs0;
   3094                                 g = 1;
   3095                                 useFallback = FALSE;
   3096                             }
   3097                         }
   3098                     }
   3099                 }
   3100 
   3101                 if(len != 0) {
   3102                     len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3103 
   3104                     /* write the designation sequence if necessary */
   3105                     if(cs != pFromU2022State->cs[g]) {
   3106                         if(cs < CNS_11643) {
   3107                             uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3108                         } else {
   3109                             U_ASSERT(cs >= CNS_11643_1);
   3110                             uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3111                         }
   3112                         len = 4;
   3113                         pFromU2022State->cs[g] = cs;
   3114                         if(g == 1) {
   3115                             /* changing the SO/G1 charset invalidates the choices[] */
   3116                             choiceCount = 0;
   3117                         }
   3118                     }
   3119 
   3120                     /* write the shift sequence if necessary */
   3121                     if(g != pFromU2022State->g) {
   3122                         switch(g) {
   3123                         case 1:
   3124                             buffer[len++] = UCNV_SO;
   3125 
   3126                             /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3127                             pFromU2022State->g = 1;
   3128                             break;
   3129                         case 2:
   3130                             buffer[len++] = 0x1b;
   3131                             buffer[len++] = 0x4e;
   3132                             break;
   3133                         default: /* case 3 */
   3134                             buffer[len++] = 0x1b;
   3135                             buffer[len++] = 0x4f;
   3136                             break;
   3137                         }
   3138                     }
   3139 
   3140                     /* write the two output bytes */
   3141                     buffer[len++] = (char)(targetValue >> 8);
   3142                     buffer[len++] = (char)targetValue;
   3143                 } else {
   3144                     /* if we cannot find the character after checking all codepages
   3145                      * then this is an error
   3146                      */
   3147                     *err = U_INVALID_CHAR_FOUND;
   3148                     cnv->fromUChar32=sourceChar;
   3149                     break;
   3150                 }
   3151             }
   3152 
   3153             /* output len>0 bytes in buffer[] */
   3154             if(len == 1) {
   3155                 *target++ = buffer[0];
   3156                 if(offsets) {
   3157                     *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */
   3158                 }
   3159             } else if(len == 2 && (target + 2) <= targetLimit) {
   3160                 *target++ = buffer[0];
   3161                 *target++ = buffer[1];
   3162                 if(offsets) {
   3163                     int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));
   3164                     *offsets++ = sourceIndex;
   3165                     *offsets++ = sourceIndex;
   3166                 }
   3167             } else {
   3168                 fromUWriteUInt8(
   3169                     cnv,
   3170                     buffer, len,
   3171                     &target, (const char *)targetLimit,
   3172                     &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
   3173                     err);
   3174                 if(U_FAILURE(*err)) {
   3175                     break;
   3176                 }
   3177             }
   3178         } /* end if(myTargetIndex<myTargetLength) */
   3179         else{
   3180             *err =U_BUFFER_OVERFLOW_ERROR;
   3181             break;
   3182         }
   3183 
   3184     }/* end while(mySourceIndex<mySourceLength) */
   3185 
   3186     /*
   3187      * the end of the input stream and detection of truncated input
   3188      * are handled by the framework, but for ISO-2022-CN conversion
   3189      * we need to be in ASCII mode at the very end
   3190      *
   3191      * conditions:
   3192      *   successful
   3193      *   not in ASCII mode
   3194      *   end of input and no truncated input
   3195      */
   3196     if( U_SUCCESS(*err) &&
   3197         pFromU2022State->g!=0 &&
   3198         args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3199     ) {
   3200         int32_t sourceIndex;
   3201 
   3202         /* we are switching to ASCII */
   3203         pFromU2022State->g=0;
   3204 
   3205         /* get the source index of the last input character */
   3206         /*
   3207          * TODO this would be simpler and more reliable if we used a pair
   3208          * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3209          * so that we could simply use the prevSourceIndex here;
   3210          * this code gives an incorrect result for the rare case of an unmatched
   3211          * trail surrogate that is alone in the last buffer of the text stream
   3212          */
   3213         sourceIndex=(int32_t)(source-args->source);
   3214         if(sourceIndex>0) {
   3215             --sourceIndex;
   3216             if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3217                 (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3218             ) {
   3219                 --sourceIndex;
   3220             }
   3221         } else {
   3222             sourceIndex=-1;
   3223         }
   3224 
   3225         fromUWriteUInt8(
   3226             cnv,
   3227             SHIFT_IN_STR, 1,
   3228             &target, (const char *)targetLimit,
   3229             &offsets, sourceIndex,
   3230             err);
   3231     }
   3232 
   3233     /*save the state and return */
   3234     args->source = source;
   3235     args->target = (char*)target;
   3236 }
   3237 
   3238 
   3239 static void
   3240 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3241                                                UErrorCode* err){
   3242     char tempBuf[3];
   3243     const char *mySource = (char *) args->source;
   3244     UChar *myTarget = args->target;
   3245     const char *mySourceLimit = args->sourceLimit;
   3246     uint32_t targetUniChar = 0x0000;
   3247     uint32_t mySourceChar = 0x0000;
   3248     UConverterDataISO2022* myData;
   3249     ISO2022State *pToU2022State;
   3250 
   3251     myData=(UConverterDataISO2022*)(args->converter->extraInfo);
   3252     pToU2022State = &myData->toU2022State;
   3253 
   3254     if(myData->key != 0) {
   3255         /* continue with a partial escape sequence */
   3256         goto escape;
   3257     } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3258         /* continue with a partial double-byte character */
   3259         mySourceChar = args->converter->toUBytes[0];
   3260         args->converter->toULength = 0;
   3261         targetUniChar = missingCharMarker;
   3262         goto getTrailByte;
   3263     }
   3264 
   3265     while(mySource < mySourceLimit){
   3266 
   3267         targetUniChar =missingCharMarker;
   3268 
   3269         if(myTarget < args->targetLimit){
   3270 
   3271             mySourceChar= (unsigned char) *mySource++;
   3272 
   3273             switch(mySourceChar){
   3274             case UCNV_SI:
   3275                 pToU2022State->g=0;
   3276                 if (myData->isEmptySegment) {
   3277                     myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
   3278                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3279                     args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3280                     args->converter->toUBytes[0] = mySourceChar;
   3281                     args->converter->toULength = 1;
   3282                     args->target = myTarget;
   3283                     args->source = mySource;
   3284                     return;
   3285                 }
   3286                 continue;
   3287 
   3288             case UCNV_SO:
   3289                 if(pToU2022State->cs[1] != 0) {
   3290                     pToU2022State->g=1;
   3291                     myData->isEmptySegment = TRUE;	/* Begin a new segment, empty so far */
   3292                     continue;
   3293                 } else {
   3294                     /* illegal to have SO before a matching designator */
   3295                     myData->isEmptySegment = FALSE;	/* Handling a different error, reset this to avoid future spurious errs */
   3296                     break;
   3297                 }
   3298 
   3299             case ESC_2022:
   3300                 mySource--;
   3301 escape:
   3302                 {
   3303                     const char * mySourceBefore = mySource;
   3304                     int8_t toULengthBefore = args->converter->toULength;
   3305 
   3306                     changeState_2022(args->converter,&(mySource),
   3307                         mySourceLimit, ISO_2022_CN,err);
   3308 
   3309                     /* After SO there must be at least one character before a designator (designator error handled separately) */
   3310                     if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3311                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3312                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3313                         args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore));
   3314                     }
   3315                 }
   3316 
   3317                 /* invalid or illegal escape sequence */
   3318                 if(U_FAILURE(*err)){
   3319                     args->target = myTarget;
   3320                     args->source = mySource;
   3321                     myData->isEmptySegment = FALSE;	/* Reset to avoid future spurious errors */
   3322                     return;
   3323                 }
   3324                 continue;
   3325 
   3326             /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3327 
   3328             case CR:
   3329                 /*falls through*/
   3330             case LF:
   3331                 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3332                 /* falls through */
   3333             default:
   3334                 /* convert one or two bytes */
   3335                 myData->isEmptySegment = FALSE;
   3336                 if(pToU2022State->g != 0) {
   3337                     if(mySource < mySourceLimit) {
   3338                         UConverterSharedData *cnv;
   3339                         StateEnum tempState;
   3340                         int32_t tempBufLen;
   3341                         int leadIsOk, trailIsOk;
   3342                         uint8_t trailByte;
   3343 getTrailByte:
   3344                         trailByte = (uint8_t)*mySource;
   3345                         /*
   3346                          * Ticket 5691: consistent illegal sequences:
   3347                          * - We include at least the first byte in the illegal sequence.
   3348                          * - If any of the non-initial bytes could be the start of a character,
   3349                          *   we stop the illegal sequence before the first one of those.
   3350                          *
   3351                          * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3352                          * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3353                          * Otherwise we convert or report the pair of bytes.
   3354                          */
   3355                         leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
   3356                         trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
   3357                         if (leadIsOk && trailIsOk) {
   3358                             ++mySource;
   3359                             tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
   3360                             if(tempState >= CNS_11643_0) {
   3361                                 cnv = myData->myConverterArray[CNS_11643];
   3362                                 tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
   3363                                 tempBuf[1] = (char) (mySourceChar);
   3364                                 tempBuf[2] = (char) trailByte;
   3365                                 tempBufLen = 3;
   3366 
   3367                             }else{
   3368                                 U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
   3369                                 cnv = myData->myConverterArray[tempState];
   3370                                 tempBuf[0] = (char) (mySourceChar);
   3371                                 tempBuf[1] = (char) trailByte;
   3372                                 tempBufLen = 2;
   3373                             }
   3374                             targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
   3375                             mySourceChar = (mySourceChar << 8) | trailByte;
   3376                         } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3377                             /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3378                             ++mySource;
   3379                             /* add another bit so that the code below writes 2 bytes in case of error */
   3380                             mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3381                         }
   3382                         if(pToU2022State->g>=2) {
   3383                             /* return from a single-shift state to the previous one */
   3384                             pToU2022State->g=pToU2022State->prevG;
   3385                         }
   3386                     } else {
   3387                         args->converter->toUBytes[0] = (uint8_t)mySourceChar;
   3388                         args->converter->toULength = 1;
   3389                         goto endloop;
   3390                     }
   3391                 }
   3392                 else{
   3393                     if(mySourceChar <= 0x7f) {
   3394                         targetUniChar = (UChar) mySourceChar;
   3395                     }
   3396                 }
   3397                 break;
   3398             }
   3399             if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3400                 if(args->offsets){
   3401                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3402                 }
   3403                 *(myTarget++)=(UChar)targetUniChar;
   3404             }
   3405             else if(targetUniChar > missingCharMarker){
   3406                 /* disassemble the surrogate pair and write to output*/
   3407                 targetUniChar-=0x0010000;
   3408                 *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));
   3409                 if(args->offsets){
   3410                     args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3411                 }
   3412                 ++myTarget;
   3413                 if(myTarget< args->targetLimit){
   3414                     *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3415                     if(args->offsets){
   3416                         args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3417                     }
   3418                     ++myTarget;
   3419                 }else{
   3420                     args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3421                                     (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
   3422                 }
   3423 
   3424             }
   3425             else{
   3426                 /* Call the callback function*/
   3427                 toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3428                 break;
   3429             }
   3430         }
   3431         else{
   3432             *err =U_BUFFER_OVERFLOW_ERROR;
   3433             break;
   3434         }
   3435     }
   3436 endloop:
   3437     args->target = myTarget;
   3438     args->source = mySource;
   3439 }
   3440 
   3441 static void
   3442 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3443     UConverter *cnv = args->converter;
   3444     UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;
   3445     ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3446     char *p, *subchar;
   3447     char buffer[8];
   3448     int32_t length;
   3449 
   3450     subchar=(char *)cnv->subChars;
   3451     length=cnv->subCharLen; /* assume length==1 for most variants */
   3452 
   3453     p = buffer;
   3454     switch(myConverterData->locale[0]){
   3455     case 'j':
   3456         {
   3457             int8_t cs;
   3458 
   3459             if(pFromU2022State->g == 1) {
   3460                 /* JIS7: switch from G1 to G0 */
   3461                 pFromU2022State->g = 0;
   3462                 *p++ = UCNV_SI;
   3463             }
   3464 
   3465             cs = pFromU2022State->cs[0];
   3466             if(cs != ASCII && cs != JISX201) {
   3467                 /* not in ASCII or JIS X 0201: switch to ASCII */
   3468                 pFromU2022State->cs[0] = (int8_t)ASCII;
   3469                 *p++ = '\x1b';
   3470                 *p++ = '\x28';
   3471                 *p++ = '\x42';
   3472             }
   3473 
   3474             *p++ = subchar[0];
   3475             break;
   3476         }
   3477     case 'c':
   3478         if(pFromU2022State->g != 0) {
   3479             /* not in ASCII mode: switch to ASCII */
   3480             pFromU2022State->g = 0;
   3481             *p++ = UCNV_SI;
   3482         }
   3483         *p++ = subchar[0];
   3484         break;
   3485     case 'k':
   3486         if(myConverterData->version == 0) {
   3487             if(length == 1) {
   3488                 if((UBool)args->converter->fromUnicodeStatus) {
   3489                     /* in DBCS mode: switch to SBCS */
   3490                     args->converter->fromUnicodeStatus = 0;
   3491                     *p++ = UCNV_SI;
   3492                 }
   3493                 *p++ = subchar[0];
   3494             } else /* length == 2*/ {
   3495                 if(!(UBool)args->converter->fromUnicodeStatus) {
   3496                     /* in SBCS mode: switch to DBCS */
   3497                     args->converter->fromUnicodeStatus = 1;
   3498                     *p++ = UCNV_SO;
   3499                 }
   3500                 *p++ = subchar[0];
   3501                 *p++ = subchar[1];
   3502             }
   3503             break;
   3504         } else {
   3505             /* save the subconverter's substitution string */
   3506             uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3507             int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3508 
   3509             /* set our substitution string into the subconverter */
   3510             myConverterData->currentConverter->subChars = (uint8_t *)subchar;
   3511             myConverterData->currentConverter->subCharLen = (int8_t)length;
   3512 
   3513             /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3514             args->converter = myConverterData->currentConverter;
   3515             myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3516             ucnv_cbFromUWriteSub(args, 0, err);
   3517             cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3518             args->converter = cnv;
   3519 
   3520             /* restore the subconverter's substitution string */
   3521             myConverterData->currentConverter->subChars = currentSubChars;
   3522             myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3523 
   3524             if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3525                 if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3526                     uprv_memcpy(
   3527                         cnv->charErrorBuffer,
   3528                         myConverterData->currentConverter->charErrorBuffer,
   3529                         myConverterData->currentConverter->charErrorBufferLength);
   3530                 }
   3531                 cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3532                 myConverterData->currentConverter->charErrorBufferLength = 0;
   3533             }
   3534             return;
   3535         }
   3536     default:
   3537         /* not expected */
   3538         break;
   3539     }
   3540     ucnv_cbFromUWriteBytes(args,
   3541                            buffer, (int32_t)(p - buffer),
   3542                            offsetIndex, err);
   3543 }
   3544 
   3545 /*
   3546  * Structure for cloning an ISO 2022 converter into a single memory block.
   3547  * ucnv_safeClone() of the converter will align the entire cloneStruct,
   3548  * and then ucnv_safeClone() of the sub-converter may additionally align
   3549  * currentConverter inside the cloneStruct, for which we need the deadSpace
   3550  * after currentConverter.
   3551  * This is because UAlignedMemory may be larger than the actually
   3552  * necessary alignment size for the platform.
   3553  * The other cloneStruct fields will not be moved around,
   3554  * and are aligned properly with cloneStruct's alignment.
   3555  */
   3556 struct cloneStruct
   3557 {
   3558     UConverter cnv;
   3559     UConverter currentConverter;
   3560     UAlignedMemory deadSpace;
   3561     UConverterDataISO2022 mydata;
   3562 };
   3563 
   3564 
   3565 static UConverter *
   3566 _ISO_2022_SafeClone(
   3567             const UConverter *cnv,
   3568             void *stackBuffer,
   3569             int32_t *pBufferSize,
   3570             UErrorCode *status)
   3571 {
   3572     struct cloneStruct * localClone;
   3573     UConverterDataISO2022 *cnvData;
   3574     int32_t i, size;
   3575 
   3576     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3577         *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3578         return NULL;
   3579     }
   3580 
   3581     cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3582     localClone = (struct cloneStruct *)stackBuffer;
   3583 
   3584     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3585 
   3586     uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3587     localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3588     localClone->cnv.isExtraLocal = TRUE;
   3589 
   3590     /* share the subconverters */
   3591 
   3592     if(cnvData->currentConverter != NULL) {
   3593         size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
   3594         localClone->mydata.currentConverter =
   3595             ucnv_safeClone(cnvData->currentConverter,
   3596                             &localClone->currentConverter,
   3597                             &size, status);
   3598         if(U_FAILURE(*status)) {
   3599             return NULL;
   3600         }
   3601     }
   3602 
   3603     for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3604         if(cnvData->myConverterArray[i] != NULL) {
   3605             ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3606         }
   3607     }
   3608 
   3609     return &localClone->cnv;
   3610 }
   3611 
   3612 static void
   3613 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3614                     const USetAdder *sa,
   3615                     UConverterUnicodeSet which,
   3616                     UErrorCode *pErrorCode)
   3617 {
   3618     int32_t i;
   3619     UConverterDataISO2022* cnvData;
   3620 
   3621     if (U_FAILURE(*pErrorCode)) {
   3622         return;
   3623     }
   3624 #ifdef U_ENABLE_GENERIC_ISO_2022
   3625     if (cnv->sharedData == &_ISO2022Data) {
   3626         /* We use UTF-8 in this case */
   3627         sa->addRange(sa->set, 0, 0xd7FF);
   3628         sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3629         return;
   3630     }
   3631 #endif
   3632 
   3633     cnvData = (UConverterDataISO2022*)cnv->extraInfo;
   3634 
   3635     /* open a set and initialize it with code points that are algorithmically round-tripped */
   3636     switch(cnvData->locale[0]){
   3637     case 'j':
   3638         /* include JIS X 0201 which is hardcoded */
   3639         sa->add(sa->set, 0xa5);
   3640         sa->add(sa->set, 0x203e);
   3641         if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3642             /* include Latin-1 for some variants of JP */
   3643             sa->addRange(sa->set, 0, 0xff);
   3644         } else {
   3645             /* include ASCII for JP */
   3646             sa->addRange(sa->set, 0, 0x7f);
   3647         }
   3648         if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3649             /*
   3650              * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3651              * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3652              * use half-width Katakana.
   3653              * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3654              * half-width Katakana via the ESC ( I sequence.
   3655              * However, we only emit (fromUnicode) half-width Katakana according to the
   3656              * definition of each variant.
   3657              *
   3658              * When including fallbacks,
   3659              * we need to include half-width Katakana Unicode code points for all JP variants because
   3660              * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3661              */
   3662             /* include half-width Katakana for JP */
   3663             sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3664         }
   3665         break;
   3666     case 'c':
   3667     case 'z':
   3668         /* include ASCII for CN */
   3669         sa->addRange(sa->set, 0, 0x7f);
   3670         break;
   3671     case 'k':
   3672         /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3673         cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3674                 cnvData->currentConverter, sa, which, pErrorCode);
   3675         /* the loop over myConverterArray[] will simply not find another converter */
   3676         break;
   3677     default:
   3678         break;
   3679     }
   3680 
   3681 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3682             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3683                 cnvData->version==0 && i==CNS_11643
   3684             ) {
   3685                 /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3686                 ucnv_MBCSGetUnicodeSetForBytes(
   3687                         cnvData->myConverterArray[i],
   3688                         sa, UCNV_ROUNDTRIP_SET,
   3689                         0, 0x81, 0x82,
   3690                         pErrorCode);
   3691             }
   3692 #endif
   3693 
   3694     for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3695         UConverterSetFilter filter;
   3696         if(cnvData->myConverterArray[i]!=NULL) {
   3697             if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3698                 cnvData->version==0 && i==CNS_11643
   3699             ) {
   3700                 /*
   3701                  * Version-specific for CN:
   3702                  * CN version 0 does not map CNS planes 3..7 although
   3703                  * they are all available in the CNS conversion table;
   3704                  * CN version 1 (-EXT) does map them all.
   3705                  * The two versions create different Unicode sets.
   3706                  */
   3707                 filter=UCNV_SET_FILTER_2022_CN;
   3708             } else if(cnvData->locale[0]=='j' && i==JISX208) {
   3709                 /*
   3710                  * Only add code points that map to Shift-JIS codes
   3711                  * corresponding to JIS X 0208.
   3712                  */
   3713                 filter=UCNV_SET_FILTER_SJIS;
   3714             } else if(i==KSC5601) {
   3715                 /*
   3716                  * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3717                  * are broader than GR94.
   3718                  */
   3719                 filter=UCNV_SET_FILTER_GR94DBCS;
   3720             } else {
   3721                 filter=UCNV_SET_FILTER_NONE;
   3722             }
   3723             ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3724         }
   3725     }
   3726 
   3727     /*
   3728      * ISO 2022 converters must not convert SO/SI/ESC despite what
   3729      * sub-converters do by themselves.
   3730      * Remove these characters from the set.
   3731      */
   3732     sa->remove(sa->set, 0x0e);
   3733     sa->remove(sa->set, 0x0f);
   3734     sa->remove(sa->set, 0x1b);
   3735 
   3736     /* ISO 2022 converters do not convert C1 controls either */
   3737     sa->removeRange(sa->set, 0x80, 0x9f);
   3738 }
   3739 
   3740 static const UConverterImpl _ISO2022Impl={
   3741     UCNV_ISO_2022,
   3742 
   3743     NULL,
   3744     NULL,
   3745 
   3746     _ISO2022Open,
   3747     _ISO2022Close,
   3748     _ISO2022Reset,
   3749 
   3750 #ifdef U_ENABLE_GENERIC_ISO_2022
   3751     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3752     T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3753     ucnv_fromUnicode_UTF8,
   3754     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3755 #else
   3756     NULL,
   3757     NULL,
   3758     NULL,
   3759     NULL,
   3760 #endif
   3761     NULL,
   3762 
   3763     NULL,
   3764     _ISO2022getName,
   3765     _ISO_2022_WriteSub,
   3766     _ISO_2022_SafeClone,
   3767     _ISO_2022_GetUnicodeSet,
   3768 
   3769     NULL,
   3770     NULL
   3771 };
   3772 static const UConverterStaticData _ISO2022StaticData={
   3773     sizeof(UConverterStaticData),
   3774     "ISO_2022",
   3775     2022,
   3776     UCNV_IBM,
   3777     UCNV_ISO_2022,
   3778     1,
   3779     3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
   3780     { 0x1a, 0, 0, 0 },
   3781     1,
   3782     FALSE,
   3783     FALSE,
   3784     0,
   3785     0,
   3786     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3787 };
   3788 const UConverterSharedData _ISO2022Data={
   3789     sizeof(UConverterSharedData),
   3790     ~((uint32_t) 0),
   3791     NULL,
   3792     NULL,
   3793     &_ISO2022StaticData,
   3794     FALSE,
   3795     &_ISO2022Impl,
   3796     0, UCNV_MBCS_TABLE_INITIALIZER
   3797 };
   3798 
   3799 /*************JP****************/
   3800 static const UConverterImpl _ISO2022JPImpl={
   3801     UCNV_ISO_2022,
   3802 
   3803     NULL,
   3804     NULL,
   3805 
   3806     _ISO2022Open,
   3807     _ISO2022Close,
   3808     _ISO2022Reset,
   3809 
   3810     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3811     UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3812     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3813     UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3814     NULL,
   3815 
   3816     NULL,
   3817     _ISO2022getName,
   3818     _ISO_2022_WriteSub,
   3819     _ISO_2022_SafeClone,
   3820     _ISO_2022_GetUnicodeSet,
   3821 
   3822     NULL,
   3823     NULL
   3824 };
   3825 static const UConverterStaticData _ISO2022JPStaticData={
   3826     sizeof(UConverterStaticData),
   3827     "ISO_2022_JP",
   3828     0,
   3829     UCNV_IBM,
   3830     UCNV_ISO_2022,
   3831     1,
   3832     6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */
   3833     { 0x1a, 0, 0, 0 },
   3834     1,
   3835     FALSE,
   3836     FALSE,
   3837     0,
   3838     0,
   3839     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3840 };
   3841 
   3842 namespace {
   3843 
   3844 const UConverterSharedData _ISO2022JPData={
   3845     sizeof(UConverterSharedData),
   3846     ~((uint32_t) 0),
   3847     NULL,
   3848     NULL,
   3849     &_ISO2022JPStaticData,
   3850     FALSE,
   3851     &_ISO2022JPImpl,
   3852     0, UCNV_MBCS_TABLE_INITIALIZER
   3853 };
   3854 
   3855 }  // namespace
   3856 
   3857 /************* KR ***************/
   3858 static const UConverterImpl _ISO2022KRImpl={
   3859     UCNV_ISO_2022,
   3860 
   3861     NULL,
   3862     NULL,
   3863 
   3864     _ISO2022Open,
   3865     _ISO2022Close,
   3866     _ISO2022Reset,
   3867 
   3868     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3869     UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3870     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3871     UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3872     NULL,
   3873 
   3874     NULL,
   3875     _ISO2022getName,
   3876     _ISO_2022_WriteSub,
   3877     _ISO_2022_SafeClone,
   3878     _ISO_2022_GetUnicodeSet,
   3879 
   3880     NULL,
   3881     NULL
   3882 };
   3883 static const UConverterStaticData _ISO2022KRStaticData={
   3884     sizeof(UConverterStaticData),
   3885     "ISO_2022_KR",
   3886     0,
   3887     UCNV_IBM,
   3888     UCNV_ISO_2022,
   3889     1,
   3890     3, /* max 3 bytes per UChar: SO+DBCS */
   3891     { 0x1a, 0, 0, 0 },
   3892     1,
   3893     FALSE,
   3894     FALSE,
   3895     0,
   3896     0,
   3897     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3898 };
   3899 
   3900 namespace {
   3901 
   3902 const UConverterSharedData _ISO2022KRData={
   3903     sizeof(UConverterSharedData),
   3904     ~((uint32_t) 0),
   3905     NULL,
   3906     NULL,
   3907     &_ISO2022KRStaticData,
   3908     FALSE,
   3909     &_ISO2022KRImpl,
   3910     0, UCNV_MBCS_TABLE_INITIALIZER
   3911 };
   3912 
   3913 }  // namespace
   3914 
   3915 /*************** CN ***************/
   3916 static const UConverterImpl _ISO2022CNImpl={
   3917 
   3918     UCNV_ISO_2022,
   3919 
   3920     NULL,
   3921     NULL,
   3922 
   3923     _ISO2022Open,
   3924     _ISO2022Close,
   3925     _ISO2022Reset,
   3926 
   3927     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3928     UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3929     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3930     UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3931     NULL,
   3932 
   3933     NULL,
   3934     _ISO2022getName,
   3935     _ISO_2022_WriteSub,
   3936     _ISO_2022_SafeClone,
   3937     _ISO_2022_GetUnicodeSet,
   3938 
   3939     NULL,
   3940     NULL
   3941 };
   3942 static const UConverterStaticData _ISO2022CNStaticData={
   3943     sizeof(UConverterStaticData),
   3944     "ISO_2022_CN",
   3945     0,
   3946     UCNV_IBM,
   3947     UCNV_ISO_2022,
   3948     1,
   3949     8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3950     { 0x1a, 0, 0, 0 },
   3951     1,
   3952     FALSE,
   3953     FALSE,
   3954     0,
   3955     0,
   3956     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3957 };
   3958 
   3959 namespace {
   3960 
   3961 const UConverterSharedData _ISO2022CNData={
   3962     sizeof(UConverterSharedData),
   3963     ~((uint32_t) 0),
   3964     NULL,
   3965     NULL,
   3966     &_ISO2022CNStaticData,
   3967     FALSE,
   3968     &_ISO2022CNImpl,
   3969     0, UCNV_MBCS_TABLE_INITIALIZER
   3970 };
   3971 
   3972 }  // namespace
   3973 
   3974 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   3975