Home | History | Annotate | Download | only in charset
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 2006-2015, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  *******************************************************************************
      6  *
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.charset;
     11 
     12 import java.io.IOException;
     13 import java.io.InputStream;
     14 import java.nio.Buffer;
     15 import java.nio.BufferOverflowException;
     16 import java.nio.ByteBuffer;
     17 import java.nio.CharBuffer;
     18 import java.nio.IntBuffer;
     19 import java.nio.charset.CharsetDecoder;
     20 import java.nio.charset.CharsetEncoder;
     21 import java.nio.charset.CoderResult;
     22 import java.util.Locale;
     23 
     24 import com.ibm.icu.charset.UConverterSharedData.UConverterType;
     25 import com.ibm.icu.impl.ICUBinary;
     26 import com.ibm.icu.impl.ICUData;
     27 import com.ibm.icu.impl.ICUResourceBundle;
     28 import com.ibm.icu.impl.InvalidFormatException;
     29 import com.ibm.icu.lang.UCharacter;
     30 import com.ibm.icu.text.UTF16;
     31 import com.ibm.icu.text.UnicodeSet;
     32 
     33 class CharsetMBCS extends CharsetICU {
     34 
     35     private byte[] fromUSubstitution = null;
     36     UConverterSharedData sharedData = null;
     37     private static final int MAX_VERSION_LENGTH = 4;
     38 
     39     // these variables are used in getUnicodeSet() and may be changed in future
     40     // typedef enum UConverterSetFilter {
     41       static final int UCNV_SET_FILTER_NONE = 1;
     42       static final int UCNV_SET_FILTER_DBCS_ONLY = 2;
     43       static final int UCNV_SET_FILTER_2022_CN = 3;
     44       static final int UCNV_SET_FILTER_SJIS= 4 ;
     45       static final int UCNV_SET_FILTER_GR94DBCS = 5;
     46       static final int UCNV_SET_FILTER_HZ = 6;
     47       static final int UCNV_SET_FILTER_COUNT = 7;
     48    //  } UConverterSetFilter;
     49 
     50     /**
     51      * Fallbacks to Unicode are stored outside the normal state table and code point structures in a vector of items of
     52      * this type. They are sorted by offset.
     53      */
     54     final static class MBCSToUFallback {
     55         int offset;
     56         int codePoint;
     57 
     58         MBCSToUFallback(int off, int cp) {
     59             offset = off;
     60             codePoint = cp;
     61         }
     62     }
     63 
     64     /**
     65      * This is the MBCS part of the UConverterTable union (a runtime data structure). It keeps all the per-converter
     66      * data and points into the loaded mapping tables.
     67      */
     68     static final class UConverterMBCSTable {
     69         /* toUnicode */
     70         short countStates;
     71         byte dbcsOnlyState;
     72         boolean stateTableOwned;
     73         int countToUFallbacks;
     74 
     75         int stateTable[/* countStates */][/* 256 */];
     76         int swapLFNLStateTable[/* countStates */][/* 256 */]; /* for swaplfnl */
     77         char unicodeCodeUnits[/* countUnicodeResults */];
     78         MBCSToUFallback toUFallbacks[/* countToUFallbacks */];
     79 
     80         /* fromUnicode */
     81         char fromUnicodeTable[];  // stage1, and for MBCS_OUTPUT_1 also contains stage2
     82         int fromUnicodeTableInts[];  // stage1 and stage2 together as int[]
     83         // Exactly one of the fromUnicode(Type) tables is not null,
     84         // depending on the outputType.
     85         byte fromUnicodeBytes[];
     86         char fromUnicodeChars[];
     87         int fromUnicodeInts[];
     88         char swapLFNLFromUnicodeChars[]; /* for swaplfnl */
     89         int fromUBytesLength;
     90         short outputType, unicodeMask;
     91 
     92         /* converter name for swaplfnl */
     93         String swapLFNLName;
     94 
     95         /* extension data */
     96         UConverterSharedData baseSharedData;
     97         // int extIndexes[];
     98         ByteBuffer extIndexes; // create int[] view etc. as needed
     99 
    100         CharBuffer mbcsIndex;                     /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
    101         // char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
    102         boolean utf8Friendly;                     /* for utf8Friendly data */
    103         char maxFastUChar;                        /* for utf8Friendly data */
    104 
    105         /* roundtrips */
    106         int asciiRoundtrips;
    107 
    108         UConverterMBCSTable() {
    109             utf8Friendly = false;
    110             mbcsIndex = null;
    111         }
    112 
    113         boolean hasSupplementary() {
    114             return (unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0;
    115         }
    116 
    117         /*
    118          * UConverterMBCSTable(UConverterMBCSTable t) { countStates = t.countStates; dbcsOnlyState = t.dbcsOnlyState;
    119          * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable;
    120          * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks =
    121          * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes;
    122          * swapLFNLFromUnicodeChars = t.swapLFNLFromUnicodeChars; fromUBytesLength = t.fromUBytesLength; outputType =
    123          * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData;
    124          * extIndexes = t.extIndexes; }
    125          */
    126     }
    127 
    128     /* Constants used in MBCS data header */
    129     // enum {
    130         static final int MBCS_OPT_LENGTH_MASK=0x3f;
    131         static final int MBCS_OPT_NO_FROM_U=0x40;
    132         /*
    133          * If any of the following options bits are set,
    134          * then the file must be rejected.
    135          */
    136         static final int MBCS_OPT_INCOMPATIBLE_MASK=0xffc0;
    137         /*
    138          * Remove bits from this mask as more options are recognized
    139          * by all implementations that use this constant.
    140          */
    141         static final int MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80;
    142     // };
    143     /* Constants for fast and UTF-8-friendly conversion. */
    144     // enum {
    145         static final int SBCS_FAST_MAX=0x0fff;               /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */
    146         static final int SBCS_FAST_LIMIT=SBCS_FAST_MAX+1;    /* =0x1000 */
    147         static final int MBCS_FAST_MAX=0xd7ff;               /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */
    148         static final int MBCS_FAST_LIMIT=MBCS_FAST_MAX+1;    /* =0xd800 */
    149     // };
    150     /**
    151      * MBCS data header. See data format description above.
    152      */
    153     final static class MBCSHeader {
    154         byte version[/* U_MAX_VERSION_LENGTH */];
    155         int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes;
    156         int flags;
    157         int fromUBytesLength;
    158 
    159         /* new and required in version 5 */
    160         int options;
    161 
    162         /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */
    163         int fullStage2Length;  /* number of 32-bit units */
    164 
    165         MBCSHeader() {
    166             version = new byte[MAX_VERSION_LENGTH];
    167         }
    168     }
    169 
    170     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath,
    171             ClassLoader loader) throws InvalidFormatException {
    172         super(icuCanonicalName, javaCanonicalName, aliases);
    173 
    174         /* See if the icuCanonicalName contains certain option information. */
    175         if (icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {
    176             options = UConverterConstants.OPTION_SWAP_LFNL;
    177             icuCanonicalName = icuCanonicalName.substring(0, icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
    178             super.icuCanonicalName = icuCanonicalName;
    179         }
    180 
    181         // now try to load the data
    182         sharedData = loadConverter(1, icuCanonicalName, classPath, loader);
    183 
    184         maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
    185         minBytesPerChar = sharedData.staticData.minBytesPerChar;
    186         maxCharsPerByte = 1;
    187         fromUSubstitution = sharedData.staticData.subChar;
    188         subChar = sharedData.staticData.subChar;
    189         subCharLen = sharedData.staticData.subCharLen;
    190         subChar1 = sharedData.staticData.subChar1;
    191         fromUSubstitution = new byte[sharedData.staticData.subCharLen];
    192         System.arraycopy(sharedData.staticData.subChar, 0, fromUSubstitution, 0, sharedData.staticData.subCharLen);
    193 
    194         initializeConverter(options);
    195     }
    196 
    197     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases)
    198             throws InvalidFormatException {
    199         this(icuCanonicalName, javaCanonicalName, aliases, ICUResourceBundle.ICU_BUNDLE, null);
    200     }
    201 
    202     private UConverterSharedData loadConverter(int nestedLoads, String myName, String classPath, ClassLoader loader)
    203             throws InvalidFormatException {
    204         boolean noFromU = false;
    205         // Read converter data from file
    206         UConverterStaticData staticData = new UConverterStaticData();
    207         UConverterDataReader reader = null;
    208         try {
    209             String itemName = myName + '.' + UConverterSharedData.DATA_TYPE;
    210             String resourceName = classPath + '/' + itemName;
    211             ByteBuffer b;
    212 
    213             if (loader != null) {
    214                 @SuppressWarnings("resource")  // Closed by getByteBufferFromInputStreamAndCloseStream().
    215                 InputStream i = ICUData.getRequiredStream(loader, resourceName);
    216                 b = ICUBinary.getByteBufferFromInputStreamAndCloseStream(i);
    217             } else if (!classPath.equals(ICUData.ICU_BUNDLE)) {
    218                 @SuppressWarnings("resource")  // Closed by getByteBufferFromInputStreamAndCloseStream().
    219                 InputStream i = ICUData.getRequiredStream(resourceName);
    220                 b = ICUBinary.getByteBufferFromInputStreamAndCloseStream(i);
    221             } else {
    222                 b = ICUBinary.getRequiredData(itemName);
    223             }
    224             reader = new UConverterDataReader(b);
    225             reader.readStaticData(staticData);
    226         } catch (IOException e) {
    227             throw new InvalidFormatException(e);
    228         } catch (Exception e) {
    229             throw new InvalidFormatException(e);
    230         }
    231 
    232         UConverterSharedData data = null;
    233         int type = staticData.conversionType;
    234 
    235         if (type != UConverterSharedData.UConverterType.MBCS
    236                 || staticData.structSize != UConverterStaticData.SIZE_OF_UCONVERTER_STATIC_DATA) {
    237             throw new InvalidFormatException();
    238         }
    239 
    240         data = new UConverterSharedData(1, null, false, 0);
    241         data.dataReader = reader;
    242         data.staticData = staticData;
    243         data.sharedDataCached = false;
    244 
    245         // Load data
    246         UConverterMBCSTable mbcsTable = data.mbcs;
    247         MBCSHeader header = new MBCSHeader();
    248         try {
    249             reader.readMBCSHeader(header);
    250         } catch (IOException e) {
    251             throw new InvalidFormatException();
    252         }
    253 
    254         int offset;
    255         // int[] extIndexesArray = null;
    256         String baseNameString = null;
    257 
    258         if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) {
    259             noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0);
    260         } else if (header.version[0] != 4) {
    261             throw new InvalidFormatException();
    262         }
    263 
    264         mbcsTable.outputType = (byte) header.flags;
    265 
    266         /* extension data, header version 4.2 and higher */
    267         offset = header.flags >>> 8;
    268         // if(offset!=0 && mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
    269         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
    270             try {
    271                 baseNameString = reader.readBaseTableName();
    272                 if (offset != 0) {
    273                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
    274                     // terminator byte all already read;
    275                     mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
    276                 }
    277             } catch (IOException e) {
    278                 throw new InvalidFormatException();
    279             }
    280         }
    281 
    282         // agljport:add this would be unnecessary if extIndexes were memory mapped
    283         /*
    284          * if(mbcsTable.extIndexes != null) {
    285          *
    286          * try { //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_LENGTH]*4 +
    287          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_UCHARS_LENGTH]*2 +
    288          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_LENGTH]*6 +
    289          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_BYTES_LENGTH] +
    290          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_12_LENGTH]*2 +
    291          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3_LENGTH]*2 +
    292          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3B_LENGTH]*4; //int nbytes =
    293          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_SIZE] //byte[] extTables = dataReader.readExtTables(nbytes);
    294          * //mbcsTable.extTables = ByteBuffer.wrap(extTables); } catch(IOException e) { System.err.println("Caught
    295          * IOException: " + e.getMessage()); pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR; return; } }
    296          */
    297         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
    298             UConverterSharedData baseSharedData = null;
    299             ByteBuffer extIndexes;
    300             String baseName;
    301 
    302             /* extension-only file, load the base table and set values appropriately */
    303             extIndexes = mbcsTable.extIndexes;
    304             if (extIndexes == null) {
    305                 /* extension-only file without extension */
    306                 throw new InvalidFormatException();
    307             }
    308 
    309             if (nestedLoads != 1) {
    310                 /* an extension table must not be loaded as a base table */
    311                 throw new InvalidFormatException();
    312             }
    313 
    314             /* load the base table */
    315             baseName = baseNameString;
    316             if (baseName.equals(staticData.name)) {
    317                 /* forbid loading this same extension-only file */
    318                 throw new InvalidFormatException();
    319             }
    320 
    321             // agljport:fix args.size=sizeof(UConverterLoadArgs);
    322             baseSharedData = loadConverter(2, baseName, classPath, loader);
    323 
    324             if (baseSharedData.staticData.conversionType != UConverterType.MBCS
    325                     || baseSharedData.mbcs.baseSharedData != null) {
    326                 // agljport:fix ucnv_unload(baseSharedData);
    327                 throw new InvalidFormatException();
    328             }
    329 
    330             /* copy the base table data */
    331             // agljport:comment deep copy in C changes mbcs through local reference mbcsTable; in java we probably don't
    332             // need the deep copy so can just make sure mbcs and its local reference both refer to the same new object
    333             mbcsTable = data.mbcs = baseSharedData.mbcs;
    334 
    335             /* overwrite values with relevant ones for the extension converter */
    336             mbcsTable.baseSharedData = baseSharedData;
    337             mbcsTable.extIndexes = extIndexes;
    338 
    339             /*
    340              * It would be possible to share the swapLFNL data with a base converter, but the generated name would have
    341              * to be different, and the memory would have to be free'd only once. It is easier to just create the data
    342              * for the extension converter separately when it is requested.
    343              */
    344             mbcsTable.swapLFNLStateTable = null;
    345             mbcsTable.swapLFNLFromUnicodeChars = null;
    346             mbcsTable.swapLFNLName = null;
    347 
    348             /*
    349              * Set a special, runtime-only outputType if the extension converter is a DBCS version of a base converter
    350              * that also maps single bytes.
    351              */
    352             if (staticData.conversionType == UConverterType.DBCS
    353                     || (staticData.conversionType == UConverterType.MBCS && staticData.minBytesPerChar >= 2)) {
    354 
    355                 if (baseSharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
    356                     /* the base converter is SI/SO-stateful */
    357                     int entry;
    358 
    359                     /* get the dbcs state from the state table entry for SO=0x0e */
    360                     entry = mbcsTable.stateTable[0][0xe];
    361                     if (MBCS_ENTRY_IS_FINAL(entry) && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_CHANGE_ONLY
    362                             && MBCS_ENTRY_FINAL_STATE(entry) != 0) {
    363                         mbcsTable.dbcsOnlyState = (byte) MBCS_ENTRY_FINAL_STATE(entry);
    364 
    365                         mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
    366                     }
    367                 } else if (baseSharedData.staticData.conversionType == UConverterType.MBCS
    368                         && baseSharedData.staticData.minBytesPerChar == 1
    369                         && baseSharedData.staticData.maxBytesPerChar == 2 && mbcsTable.countStates <= 127) {
    370 
    371                     /* non-stateful base converter, need to modify the state table */
    372                     int newStateTable[][/* 256 */];
    373                     int state[]; // this works because java 2-D array is array of references and we can have state =
    374                     // newStateTable[i];
    375                     int i, count;
    376 
    377                     /* allocate a new state table and copy the base state table contents */
    378                     count = mbcsTable.countStates;
    379                     newStateTable = new int[(count + 1) * 1024][256];
    380 
    381                     for (i = 0; i < mbcsTable.stateTable.length; ++i)
    382                         System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0,
    383                                 mbcsTable.stateTable[i].length);
    384 
    385                     /* change all final single-byte entries to go to a new all-illegal state */
    386                     state = newStateTable[0];
    387                     for (i = 0; i < 256; ++i) {
    388                         if (MBCS_ENTRY_IS_FINAL(state[i])) {
    389                             state[i] = MBCS_ENTRY_TRANSITION(count, 0);
    390                         }
    391                     }
    392 
    393                     /* build the new all-illegal state */
    394                     state = newStateTable[count];
    395                     for (i = 0; i < 256; ++i) {
    396                         state[i] = MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
    397                     }
    398                     mbcsTable.stateTable = newStateTable;
    399                     mbcsTable.countStates = (byte) (count + 1);
    400                     mbcsTable.stateTableOwned = true;
    401 
    402                     mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
    403                 }
    404             }
    405 
    406             /*
    407              * unlike below for files with base tables, do not get the unicodeMask from the sharedData; instead, use the
    408              * base table's unicodeMask, which we copied in the memcpy above; this is necessary because the static data
    409              * unicodeMask, especially the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
    410              */
    411         } else {
    412             /* conversion file with a base table; an additional extension table is optional */
    413             /* make sure that the output type is known */
    414             switch (mbcsTable.outputType) {
    415             case MBCS_OUTPUT_1:
    416             case MBCS_OUTPUT_2:
    417             case MBCS_OUTPUT_3:
    418             case MBCS_OUTPUT_4:
    419             case MBCS_OUTPUT_3_EUC:
    420             case MBCS_OUTPUT_4_EUC:
    421             case MBCS_OUTPUT_2_SISO:
    422                 /* OK */
    423                 break;
    424             default:
    425                 throw new InvalidFormatException();
    426             }
    427 
    428             /*
    429              * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient
    430              * function implementations
    431              */
    432             // agljport:fix info.size=sizeof(UDataInfo);
    433             // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
    434             if (reader.dataFormatHasUnicodeMask()) {
    435                 /* mask off possible future extensions to be safe */
    436                 mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
    437             } else {
    438                 /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
    439                 mbcsTable.unicodeMask = UConverterConstants.HAS_SUPPLEMENTARY | UConverterConstants.HAS_SURROGATES;
    440             }
    441             try {
    442                 reader.readMBCSTable(header, mbcsTable);
    443             } catch (IOException e) {
    444                 throw new InvalidFormatException();
    445             }
    446 
    447             if (offset != 0) {
    448                 try {
    449                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
    450                     // terminator byte all already read;
    451                     // int namelen = baseNameString != null? baseNameString.length() + 1: 0;
    452                     mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
    453                 } catch (IOException e) {
    454                     throw new InvalidFormatException();
    455                 }
    456             }
    457 
    458             if (header.version[1] >= 3 && (mbcsTable.unicodeMask & UConverterConstants.HAS_SURROGATES) == 0 &&
    459                     (mbcsTable.countStates == 1 ? ((char)header.version[2] >= (SBCS_FAST_MAX>>8)) : ((char)header.version[2] >= (MBCS_FAST_MAX>>8)))) {
    460                 mbcsTable.utf8Friendly = true;
    461 
    462                 if (mbcsTable.countStates == 1) {
    463                     /*
    464                      * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
    465                      * Build a table with indexes to each block, to be used instead of
    466                      * the regular stage 1/2 table.
    467                      */
    468 //                    sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
    469 //                    for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
    470 //                        mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
    471 //                    }
    472                     /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */
    473                     mbcsTable.maxFastUChar = SBCS_FAST_MAX;
    474                 } else {
    475                     /*
    476                      * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
    477                      * The .cnv file is prebuilt with an additional stage table with indexes to each block.
    478                      */
    479                     mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);
    480                 }
    481             }
    482             /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
    483             {
    484                 int asciiRoundtrips = 0xffffffff;
    485                 for (int i = 0; i < 0x80; ++i) {
    486                     if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
    487                         asciiRoundtrips &= ~(1 << (i >> 2));
    488                     }
    489                 }
    490                 mbcsTable.asciiRoundtrips = asciiRoundtrips;
    491             }
    492             // TODO: Use asciiRoundtrips to speed up conversion, like in ICU4C.
    493 
    494             if (noFromU) {
    495                 int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40;
    496                 int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2;
    497                 reconstituteData(mbcsTable, stage1Length, stage2Length, header.fullStage2Length);
    498             }
    499             if (mbcsTable.outputType == MBCS_OUTPUT_DBCS_ONLY || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) {
    500                 /*
    501                  * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
    502                  * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
    503                  */
    504                 mbcsTable.asciiRoundtrips = 0;
    505             }
    506         }
    507         // TODO: Use mbcsIndex to speed up UTF-16 conversion, like in ICU4C.
    508         mbcsTable.mbcsIndex = null;
    509         return data;
    510     }
    511 
    512     private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) {
    513         char[] table;
    514         byte[] bytes;
    515         int stage2;
    516         int p;
    517         int c;
    518         int i, st3;
    519         long temp;
    520 
    521         table = mbcsTable.fromUnicodeTable;
    522         int[] tableInts = mbcsTable.fromUnicodeTableInts;
    523         bytes = mbcsTable.fromUnicodeBytes;
    524         char[] chars = mbcsTable.fromUnicodeChars;
    525         int[] ints = mbcsTable.fromUnicodeInts;
    526 
    527         /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
    528         switch(mbcsTable.outputType) {
    529         case MBCS_OUTPUT_3_EUC:
    530             if(value<=0xffff) {
    531                 /* short sequences are stored directly */
    532                 /* code set 0 or 1 */
    533             } else if(value<=0x8effff) {
    534                 /* code set 2 */
    535                 value&=0x7fff;
    536             } else /* first byte is 0x8f */ {
    537                 /* code set 3 */
    538                 value&=0xff7f;
    539             }
    540             break;
    541         case MBCS_OUTPUT_4_EUC:
    542             if(value<=0xffffff) {
    543                 /* short sequences are stored directly */
    544                 /* code set 0 or 1 */
    545             } else if(value<=0x8effffff) {
    546                 /* code set 2 */
    547                 value&=0x7fffff;
    548             } else /* first byte is 0x8f */ {
    549                 /* code set 3 */
    550                 value&=0xff7fff;
    551             }
    552             break;
    553         default:
    554             break;
    555         }
    556 
    557         for(i=0; i<=0x1f; ++value, ++i) {
    558             c=codePoints[i];
    559             if(c<0) {
    560                 continue;
    561             }
    562 
    563             /* locate the stage 2 & 3 data */
    564             stage2 = table[c>>10] + ((c>>4)&0x3f);
    565             st3 = tableInts[stage2];
    566             st3 = (int)(char)(st3 * 16 + (c&0xf));
    567 
    568             /* write the codepage bytes into stage 3 */
    569             switch(mbcsTable.outputType) {
    570             case MBCS_OUTPUT_3:
    571             case MBCS_OUTPUT_4_EUC:
    572                 p = st3*3;
    573                 bytes[p] = (byte)(value>>16);
    574                 bytes[p+1] = (byte)(value>>8);
    575                 bytes[p+2] = (byte)value;
    576                 break;
    577             case MBCS_OUTPUT_4:
    578                 ints[st3] = (int)value;
    579                 break;
    580             default:
    581                 /* 2 bytes per character */
    582                 chars[st3] = (char)value;
    583                 break;
    584             }
    585 
    586             /* set the roundtrip flag */
    587             temp = (1L<<(16+(c&0xf)));
    588             tableInts[stage2] |= temp;
    589         }
    590         return true;
    591      }
    592 
    593     private static void reconstituteData(UConverterMBCSTable mbcsTable,
    594             int stage1Length, int stage2Length, int fullStage2Length) {
    595         char[] stage1 = mbcsTable.fromUnicodeTable;
    596 
    597         // stage2 starts with unused stage1 space.
    598         // Indexes into stage 2 count from the bottom of the fromUnicodeTable.
    599         int numStage1Ints = stage1Length / 2;  // 2 chars = 1 int
    600         int[] stage2 = new int[numStage1Ints + fullStage2Length];
    601         System.arraycopy(mbcsTable.fromUnicodeTableInts, numStage1Ints,
    602                 stage2, (fullStage2Length - stage2Length) + numStage1Ints,
    603                 stage2Length);
    604         mbcsTable.fromUnicodeTableInts = stage2;
    605 
    606         /* reconstitute the initial part of stage 2 from the mbcsIndex */
    607         {
    608             int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6;
    609             int stageUTF8Index=0;
    610             int st1, st2, st3, i;
    611 
    612             for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) {
    613                 st2 = stage1[st1];
    614                 if (st2 != stage1Length/2) {
    615                     /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
    616                     for (i = 0; i < 16; ++i) {
    617                         st3 = mbcsTable.mbcsIndex.get(stageUTF8Index++);
    618                         if (st3 != 0) {
    619                             /* a stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
    620                             st3>>=4;
    621                             /*
    622                              * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
    623                              * allocated together as a single 64-block for access from the mbcsIndex
    624                              */
    625                             stage2[st2++] = st3++;
    626                             stage2[st2++] = st3++;
    627                             stage2[st2++] = st3++;
    628                             stage2[st2++] = st3;
    629                         } else {
    630                             /* no stage 3 block, skip */
    631                             st2+=4;
    632                         }
    633                     }
    634                 } else {
    635                     /* no stage 2 block, skip */
    636                     stageUTF8Index+=16;
    637                 }
    638             }
    639         }
    640 
    641         switch (mbcsTable.outputType) {
    642         case CharsetMBCS.MBCS_OUTPUT_2:
    643         case CharsetMBCS.MBCS_OUTPUT_2_SISO:
    644         case CharsetMBCS.MBCS_OUTPUT_3_EUC:
    645             mbcsTable.fromUnicodeChars = new char[mbcsTable.fromUBytesLength / 2];
    646             break;
    647         case CharsetMBCS.MBCS_OUTPUT_3:
    648         case CharsetMBCS.MBCS_OUTPUT_4_EUC:
    649             mbcsTable.fromUnicodeBytes = new byte[mbcsTable.fromUBytesLength];
    650             break;
    651         case CharsetMBCS.MBCS_OUTPUT_4:
    652             mbcsTable.fromUnicodeInts = new int[mbcsTable.fromUBytesLength / 4];
    653             break;
    654         default:
    655             // Cannot occur, caller checked already.
    656             assert false;
    657         }
    658 
    659         /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
    660         MBCSEnumToUnicode(mbcsTable);
    661     }
    662 
    663     /*
    664      * Internal function enumerating the toUnicode data of an MBCS converter.
    665      * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
    666      * table, but could also be used for a future getUnicodeSet() option
    667      * that includes reverse fallbacks (after updating this function's implementation).
    668      * Currently only handles roundtrip mappings.
    669      * Does not currently handle extensions.
    670      */
    671     private static void MBCSEnumToUnicode(UConverterMBCSTable mbcsTable) {
    672         /*
    673          * Properties for each state, to speed up the enumeration.
    674          * Ignorable actions are unassigned/illegal/state-change-only:
    675          * They do not lead to mappings.
    676          *
    677          * Bits 7..6
    678          * 1 direct/initial state (stateful converters have mulitple)
    679          * 0 non-initial state with transitions or with nonignorable result actions
    680          * -1 final state with only ignorable actions
    681          *
    682          * Bits 5..3
    683          * The lowest byte value with non-ignorable actions is
    684          * value<<5 (rounded down).
    685          *
    686          * Bits 2..0:
    687          * The highest byte value with non-ignorable actions is
    688          * (value<<5)&0x1f (rounded up).
    689          */
    690         byte stateProps[] = new byte[MBCS_MAX_STATE_COUNT];
    691         int state;
    692 
    693         /* recurse from state 0 and set all stateProps */
    694         getStateProp(mbcsTable.stateTable, stateProps, 0);
    695 
    696         for (state = 0; state < mbcsTable.countStates; ++state) {
    697             if (stateProps[state] >= 0x40) {
    698                 /* start from each direct state */
    699                 enumToU(mbcsTable, stateProps, state, 0, 0);
    700             }
    701         }
    702 
    703 
    704     }
    705 
    706     private static boolean enumToU(UConverterMBCSTable mbcsTable, byte stateProps[], int state, int offset, int value) {
    707         int[] codePoints = new int[32];
    708         int[] row;
    709         char[] unicodeCodeUnits;
    710         int anyCodePoints;
    711         int b, limit;
    712 
    713         row = mbcsTable.stateTable[state];
    714         unicodeCodeUnits = mbcsTable.unicodeCodeUnits;
    715 
    716         value<<=8;
    717         anyCodePoints = -1; /* becomes non-negative if there is a mapping */
    718 
    719         b = (stateProps[state]&0x38)<<2;
    720         if (b == 0 && stateProps[state] >= 0x40) {
    721             /* skip byte sequences with leading zeros because they are note stored in the fromUnicode table */
    722             codePoints[0] = UConverterConstants.U_SENTINEL;
    723             b = 1;
    724         }
    725         limit = ((stateProps[state]&7)+1)<<5;
    726         while (b < limit) {
    727             int entry = row[b];
    728             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    729                 int nextState = MBCS_ENTRY_TRANSITION_STATE(entry);
    730                 if (stateProps[nextState] >= 0) {
    731                     /* recurse to a state with non-ignorable actions */
    732                     if (!enumToU(mbcsTable, stateProps, nextState, offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), value|b)) {
    733                         return false;
    734                     }
    735                 }
    736                 codePoints[b&0x1f] = UConverterConstants.U_SENTINEL;
    737             } else {
    738                 int c;
    739                 int action;
    740 
    741                 /*
    742                  * An if-else-if chain provides more reliable performance for
    743                  * the most common cases compared to a switch.
    744                  */
    745                 action = MBCS_ENTRY_FINAL_ACTION(entry);
    746                 if (action == MBCS_STATE_VALID_DIRECT_16) {
    747                     /* output BMP code point */
    748                     c = MBCS_ENTRY_FINAL_VALUE_16(entry);
    749                 } else if (action == MBCS_STATE_VALID_16) {
    750                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
    751                     c = unicodeCodeUnits[finalOffset];
    752                     if (c < 0xfffe) {
    753                         /* output BMP code point */
    754                     } else {
    755                         c = UConverterConstants.U_SENTINEL;
    756                     }
    757                 } else if (action == MBCS_STATE_VALID_16_PAIR) {
    758                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
    759                     c = unicodeCodeUnits[finalOffset++];
    760                     if (c < 0xd800) {
    761                         /* output BMP code point below 0xd800 */
    762                     } else if (c <= 0xdbff) {
    763                         /* output roundtrip or fallback supplementary code point */
    764                         c = ((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
    765                     } else if (c == 0xe000) {
    766                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
    767                         c = unicodeCodeUnits[finalOffset];
    768                     } else {
    769                         c = UConverterConstants.U_SENTINEL;
    770                     }
    771                 } else if (action == MBCS_STATE_VALID_DIRECT_20) {
    772                     /* output supplementary code point */
    773                     c = MBCS_ENTRY_FINAL_VALUE(entry)+0x10000;
    774                 } else {
    775                     c = UConverterConstants.U_SENTINEL;
    776                 }
    777 
    778                 codePoints[b&0x1f] = c;
    779                 anyCodePoints&=c;
    780             }
    781             if (((++b)&0x1f) == 0) {
    782                 if(anyCodePoints>=0) {
    783                     if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20), codePoints)) {
    784                         return false;
    785                     }
    786                     anyCodePoints=-1;
    787                 }
    788             }
    789         }
    790 
    791         return true;
    792     }
    793 
    794     /*
    795      * Only called if stateProps[state]==-1.
    796      * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
    797      * MBCS_STATE_CHANGE_ONLY.
    798      */
    799     private static byte getStateProp(int stateTable[][], byte stateProps[], int state) {
    800         int[] row;
    801         int min, max, entry, nextState;
    802 
    803         row = stateTable[state];
    804         stateProps[state] = 0;
    805 
    806         /* find first non-ignorable state */
    807         for (min = 0;;++min) {
    808             entry = row[min];
    809             nextState = MBCS_ENTRY_STATE(entry);
    810             if (stateProps[nextState] == -1) {
    811                 getStateProp(stateTable, stateProps, nextState);
    812             }
    813             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    814                 if (stateProps[nextState] >- 0) {
    815                     break;
    816                 }
    817             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
    818                 break;
    819             }
    820             if (min == 0xff) {
    821                 stateProps[state] = -0x40;  /* (byte)0xc0 */
    822                 return stateProps[state];
    823             }
    824         }
    825         stateProps[state]|=(byte)((min>>5)<<3);
    826 
    827         /* find last non-ignorable state */
    828         for (max = 0xff; min < max; --max) {
    829             entry = row[max];
    830             nextState = MBCS_ENTRY_STATE(entry);
    831             if (stateProps[nextState] == -1) {
    832                 getStateProp(stateTable, stateProps, nextState);
    833             }
    834             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    835                 if (stateProps[nextState] >- 0) {
    836                     break;
    837                 }
    838             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
    839                 break;
    840             }
    841         }
    842         stateProps[state]|=(byte)(max>>5);
    843 
    844         /* recurse further and collect direct-state information */
    845         while (min <= max) {
    846             entry = row[min];
    847             nextState = MBCS_ENTRY_STATE(entry);
    848             if (stateProps[nextState] == -1) {
    849                 getStateProp(stateTable, stateProps, nextState);
    850             }
    851             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    852                 stateProps[nextState]|=0x40;
    853                 if (MBCS_ENTRY_FINAL_ACTION(entry) <= MBCS_STATE_FALLBACK_DIRECT_20) {
    854                     stateProps[state]|=0x40;
    855                 }
    856             }
    857             ++min;
    858         }
    859         return stateProps[state];
    860     }
    861 
    862     protected void initializeConverter(int myOptions) {
    863         UConverterMBCSTable mbcsTable;
    864         ByteBuffer extIndexes;
    865         short outputType;
    866         byte maxBytesPerUChar;
    867 
    868         mbcsTable = sharedData.mbcs;
    869         outputType = mbcsTable.outputType;
    870 
    871         if (outputType == MBCS_OUTPUT_DBCS_ONLY) {
    872             /* the swaplfnl option does not apply, remove it */
    873             this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL;
    874         }
    875 
    876         if ((myOptions & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
    877             /* do this because double-checked locking is broken */
    878             boolean isCached;
    879 
    880             // agljport:todo umtx_lock(NULL);
    881             isCached = mbcsTable.swapLFNLStateTable != null;
    882             // agljport:todo umtx_unlock(NULL);
    883 
    884             if (!isCached) {
    885                 try {
    886                     if (!EBCDICSwapLFNL()) {
    887                         /* this option does not apply, remove it */
    888                         this.options = myOptions & ~UConverterConstants.OPTION_SWAP_LFNL;
    889                     }
    890                 } catch (Exception e) {
    891                     /* something went wrong. */
    892                     return;
    893                 }
    894             }
    895         }
    896 
    897         String lowerCaseName = icuCanonicalName.toLowerCase(Locale.ENGLISH);
    898         if (lowerCaseName.indexOf("gb18030") >= 0) {
    899             /* set a flag for GB 18030 mode, which changes the callback behavior */
    900             this.options |= MBCS_OPTION_GB18030;
    901         } else if (lowerCaseName.indexOf("keis") >= 0) {
    902             this.options |= MBCS_OPTION_KEIS;
    903         } else if (lowerCaseName.indexOf("jef") >= 0) {
    904             this.options |= MBCS_OPTION_JEF;
    905         } else if (lowerCaseName.indexOf("jips") >= 0) {
    906             this.options |= MBCS_OPTION_JIPS;
    907         }
    908 
    909         /* fix maxBytesPerUChar depending on outputType and options etc. */
    910         if (outputType == MBCS_OUTPUT_2_SISO) {
    911             /* changed from 3 to 4 in ICU4J only. #9205 */
    912             maxBytesPerChar = 4; /* SO+DBCS+SI*/
    913         }
    914 
    915         extIndexes = mbcsTable.extIndexes;
    916         if (extIndexes != null) {
    917             maxBytesPerUChar = (byte) GET_MAX_BYTES_PER_UCHAR(extIndexes);
    918             if (outputType == MBCS_OUTPUT_2_SISO) {
    919                 ++maxBytesPerUChar; /* SO + multiple DBCS */
    920             }
    921 
    922             if (maxBytesPerUChar > maxBytesPerChar) {
    923                 maxBytesPerChar = maxBytesPerUChar;
    924             }
    925         }
    926     }
    927      /* EBCDIC swap LF<->NL--------------------------------------------------------------------------------*/
    928      /*
    929       * This code modifies a standard EBCDIC<->Unicode mappling table for
    930       * OS/390 (z/OS) Unix System Services (Open Edition).
    931       * The difference is in the mapping of Line Feed and New Line control codes:
    932       * Standard EBDIC maps
    933       *
    934       * <U000A> \x25 |0
    935       * <U0085> \x15 |0
    936       *
    937       * but OS/390 USS EBCDIC swaps the control codes for LF and NL,
    938       * mapping
    939       *
    940       * <U000A> \x15 |0
    941       * <U0085> \x25 |0
    942       *
    943       * This code modifies a loaded standard EBCDIC<->Unicode mapping table
    944       * by copying it into allocated memory and swapping the LF and NL values.
    945       * It allows to support the same EBCDIC charset in both version without
    946       * duplicating the entire installed table.
    947       */
    948     /* standard EBCDIC codes */
    949     private static final short EBCDIC_LF = 0x0025;
    950     private static final short EBCDIC_NL = 0x0015;
    951 
    952     /* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
    953     private static final short EBCDIC_RT_LF = 0x0f25;
    954     private static final short EBCDIC_RT_NL = 0x0f15;
    955 
    956     /* Unicode code points */
    957     private static final short U_LF = 0x000A;
    958     private static final short U_NL = 0x0085;
    959 
    960     private boolean EBCDICSwapLFNL() throws Exception {
    961         UConverterMBCSTable mbcsTable;
    962 
    963         char[] table;
    964 
    965         int[][] newStateTable;
    966         String newName;
    967 
    968         int stage2Entry;
    969 
    970         mbcsTable = sharedData.mbcs;
    971 
    972         table = mbcsTable.fromUnicodeTable;
    973         int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
    974         char[] chars = mbcsTable.fromUnicodeChars;
    975         char[] results = chars;
    976 
    977         /*
    978          * Check that this is an EBCDIC table with SBCS portion -
    979          * SBCS or EBCDIC with standard EBCDIC LF and NL mappings.
    980          *
    981          * If not, ignore the option. Options are always ignored if they do not apply.
    982          */
    983         if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) &&
    984               mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
    985               mbcsTable.stateTable[0][EBCDIC_NL] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL))) {
    986             return false;
    987         }
    988 
    989         if (mbcsTable.outputType == MBCS_OUTPUT_1) {
    990             if (!(EBCDIC_RT_LF == MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
    991                   EBCDIC_RT_NL == MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL))) {
    992                 return false;
    993             }
    994         } else /* MBCS_OUTPUT_2_SISO */ {
    995             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
    996             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) &&
    997                   EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_LF))) {
    998                 return false;
    999             }
   1000 
   1001             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
   1002             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) &&
   1003                   EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_NL))) {
   1004                 return false;
   1005             }
   1006         }
   1007 
   1008         if (mbcsTable.fromUBytesLength > 0) {
   1009             /*
   1010              * We _know_ the number of bytes in the fromUnicodeBytes array
   1011              * starting with header.version 4.1.
   1012              */
   1013             // sizeofFromUBytes = mbcsTable.fromUBytesLength;
   1014         } else {
   1015             /*
   1016              * Otherwise:
   1017              * There used to be code to enumerate the fromUnicode
   1018              * trie and find the highest entry, but it was removed in ICU 3.2
   1019              * because it was not tested and caused a low code coverage number.
   1020              */
   1021             throw new Exception("U_INVALID_FORMAT_ERROR");
   1022         }
   1023 
   1024         /*
   1025          * The table has an appropriate format.
   1026          * Allocate and build
   1027          * - a modified to-Unicode state table
   1028          * - a modified from-Unicode output array
   1029          * - a converter name string with the swap option appended
   1030          */
   1031 //        size = mbcsTable.countStates * 1024 + sizeofFromUBytes + UConverterConstants.MAX_CONVERTER_NAME_LENGTH + 20;
   1032 
   1033         /* copy and modify the to-Unicode state table */
   1034         newStateTable = new int[mbcsTable.stateTable.length][mbcsTable.stateTable[0].length];
   1035         for (int i = 0; i < newStateTable.length; i++) {
   1036             System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, newStateTable[i].length);
   1037         }
   1038 
   1039         newStateTable[0][EBCDIC_LF] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
   1040         newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
   1041 
   1042         /* copy and modify the from-Unicode result table */
   1043         char[] newResults = new char[chars.length];
   1044         System.arraycopy(chars, 0, newResults, 0, chars.length);
   1045         /* conveniently, the table access macros work on the left side of expressions */
   1046         if (mbcsTable.outputType == MBCS_OUTPUT_1) {
   1047             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL);
   1048             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF);
   1049         } else /* MBCS_OUTPUT_2_SISO */ {
   1050             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
   1051             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL);
   1052 
   1053             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
   1054             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF);
   1055         }
   1056 
   1057         /* set the canonical converter name */
   1058         newName = icuCanonicalName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING);
   1059 
   1060         if (mbcsTable.swapLFNLStateTable == null) {
   1061             mbcsTable.swapLFNLStateTable = newStateTable;
   1062             mbcsTable.swapLFNLFromUnicodeChars = newResults;
   1063             mbcsTable.swapLFNLName = newName;
   1064         }
   1065         return true;
   1066     }
   1067 
   1068     /**
   1069      * MBCS output types for conversions from Unicode. These per-converter types determine the storage method in stage 3
   1070      * of the lookup table, mostly how many bytes are stored per entry.
   1071      */
   1072     static final int MBCS_OUTPUT_1 = 0; /* 0 */
   1073     static final int MBCS_OUTPUT_2 = MBCS_OUTPUT_1 + 1; /* 1 */
   1074     static final int MBCS_OUTPUT_3 = MBCS_OUTPUT_2 + 1; /* 2 */
   1075     static final int MBCS_OUTPUT_4 = MBCS_OUTPUT_3 + 1; /* 3 */
   1076     static final int MBCS_OUTPUT_3_EUC = 8; /* 8 */
   1077     static final int MBCS_OUTPUT_4_EUC = MBCS_OUTPUT_3_EUC + 1; /* 9 */
   1078     static final int MBCS_OUTPUT_2_SISO = 12; /* c */
   1079     static final int MBCS_OUTPUT_2_HZ = MBCS_OUTPUT_2_SISO + 1; /* d */
   1080     static final int MBCS_OUTPUT_EXT_ONLY = MBCS_OUTPUT_2_HZ + 1; /* e */
   1081     // static final int MBCS_OUTPUT_COUNT = MBCS_OUTPUT_EXT_ONLY + 1;
   1082     static final int MBCS_OUTPUT_DBCS_ONLY = 0xdb; /* runtime-only type for DBCS-only handling of SISO tables */
   1083 
   1084     /* GB 18030 data ------------------------------------------------------------ */
   1085 
   1086     /* helper macros for linear values for GB 18030 four-byte sequences */
   1087     private static int LINEAR_18030(int a, int b, int c, int d) {
   1088         return ((((a & 0xff) * 10 + (b & 0xff)) * 126 + (c & 0xff)) * 10 + (d & 0xff));
   1089     }
   1090 
   1091     private static int LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
   1092 
   1093     private static int LINEAR(int x) {
   1094         return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff);
   1095     }
   1096 
   1097     /*
   1098      * Some ranges of GB 18030 where both the Unicode code points and the GB four-byte sequences are contiguous and are
   1099      * handled algorithmically by the special callback functions below. The values are start & end of Unicode & GB
   1100      * codes.
   1101      *
   1102      * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30.
   1103      */
   1104     private static final int gb18030Ranges[][] = new int[/* 14 */][/* 4 */] {
   1105             { 0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35) },
   1106             { 0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738) },
   1107             { 0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436) },
   1108             { 0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531) },
   1109             { 0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534) },
   1110             { 0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38) },
   1111             { 0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537) },
   1112             { 0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32) },
   1113             { 0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237) },
   1114             { 0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733) },
   1115             { 0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837) },
   1116             { 0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638) },
   1117             { 0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931) },
   1118             { 0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439) } };
   1119 
   1120     /* bit flag for UConverter.options indicating GB 18030 special handling */
   1121     private static final int MBCS_OPTION_GB18030 = 0x8000;
   1122 
   1123     /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
   1124     private static final int MBCS_OPTION_KEIS = 0x01000;
   1125     private static final int MBCS_OPTION_JEF = 0x02000;
   1126     private static final int MBCS_OPTION_JIPS = 0x04000;
   1127 
   1128     private static enum SISO_Option {
   1129         SI,
   1130         SO
   1131     }
   1132 
   1133     private static final byte[] KEIS_SO_CHAR = { 0x0A, 0x42 };
   1134     private static final byte[] KEIS_SI_CHAR = { 0x0A, 0x41 };
   1135     private static final byte JEF_SO_CHAR = 0x28;
   1136     private static final byte JEF_SI_CHAR = 0x29;
   1137     private static final byte[] JIPS_SO_CHAR = { 0x1A, 0x70 };
   1138     private static final byte[] JIPS_SI_CHAR = { 0x1A, 0x71 };
   1139 
   1140     private static int getSISOBytes(SISO_Option option, int cnvOption, byte[] value) {
   1141         int SISOLength = 0;
   1142 
   1143         switch (option) {
   1144             case SI:
   1145                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
   1146                     value[0] = KEIS_SI_CHAR[0];
   1147                     value[1] = KEIS_SI_CHAR[1];
   1148                     SISOLength = 2;
   1149                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
   1150                     value[0] = JEF_SI_CHAR;
   1151                     SISOLength = 1;
   1152                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
   1153                     value[0] = JIPS_SI_CHAR[0];
   1154                     value[1] = JIPS_SI_CHAR[1];
   1155                     SISOLength = 2;
   1156                 } else {
   1157                     value[0] = UConverterConstants.SI;
   1158                     SISOLength = 1;
   1159                 }
   1160                 break;
   1161             case SO:
   1162                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
   1163                     value[0] = KEIS_SO_CHAR[0];
   1164                     value[1] = KEIS_SO_CHAR[1];
   1165                     SISOLength = 2;
   1166                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
   1167                     value[0] = JEF_SO_CHAR;
   1168                     SISOLength = 1;
   1169                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
   1170                     value[0] = JIPS_SO_CHAR[0];
   1171                     value[1] = JIPS_SO_CHAR[1];
   1172                     SISOLength = 2;
   1173                 } else {
   1174                     value[0] = UConverterConstants.SO;
   1175                     SISOLength = 1;
   1176                 }
   1177                 break;
   1178             default:
   1179                 /* Should never happen. */
   1180                 break;
   1181         }
   1182 
   1183         return SISOLength;
   1184     }
   1185     // enum {
   1186         static final int MBCS_MAX_STATE_COUNT = 128;
   1187     // };
   1188     /**
   1189      * MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries.
   1190      */
   1191     static final int MBCS_STATE_VALID_DIRECT_16 = 0;
   1192     static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
   1193     static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
   1194     static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
   1195     static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
   1196     static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
   1197     static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
   1198     static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
   1199     static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
   1200 
   1201     static int MBCS_ENTRY_SET_STATE(int entry, int state) {
   1202         return (entry&0x80ffffff)|(state<<24L);
   1203     }
   1204 
   1205     static int MBCS_ENTRY_STATE(int entry) {
   1206         return (((entry)>>24)&0x7f);
   1207     }
   1208 
   1209     /* Methods for state table entries */
   1210     static int MBCS_ENTRY_TRANSITION(int state, int offset) {
   1211         return (state << 24L) | offset;
   1212     }
   1213 
   1214     static int MBCS_ENTRY_FINAL(int state, int action, int value) {
   1215         return 0x80000000 | (state << 24L) | (action << 20L) | value;
   1216     }
   1217 
   1218     static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
   1219         return (entry) >= 0;
   1220     }
   1221 
   1222     static boolean MBCS_ENTRY_IS_FINAL(int entry) {
   1223         return (entry) < 0;
   1224     }
   1225 
   1226     static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
   1227         return ((entry) >>> 24);
   1228     }
   1229 
   1230     static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
   1231         return ((entry) & 0xffffff);
   1232     }
   1233 
   1234     static int MBCS_ENTRY_FINAL_STATE(int entry) {
   1235         return ((entry) >>> 24) & 0x7f;
   1236     }
   1237 
   1238     static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
   1239         return ((entry) < 0x80100000);
   1240     }
   1241 
   1242     static int MBCS_ENTRY_FINAL_ACTION(int entry) {
   1243         return ((entry) >>> 20) & 0xf;
   1244     }
   1245 
   1246     static int MBCS_ENTRY_FINAL_VALUE(int entry) {
   1247         return ((entry) & 0xfffff);
   1248     }
   1249 
   1250     static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
   1251         return (char) (entry);
   1252     }
   1253 
   1254     static boolean MBCS_IS_ASCII_ROUNDTRIP(int b, long asciiRoundtrips) {
   1255         return (((asciiRoundtrips) & (1<<((b)>>2)))!=0);
   1256     }
   1257 
   1258     /**
   1259      * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. It works for single-byte,
   1260      * single-state codepages that only map to and from BMP code points, and it always returns fallback values.
   1261      */
   1262     static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) {
   1263         assert 0 <= b && b <= 0xff;
   1264         return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b]);
   1265     }
   1266 
   1267     /* single-byte fromUnicode: get the 16-bit result word */
   1268     static char MBCS_SINGLE_RESULT_FROM_U(char[] table, char[] results, int c) {
   1269         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
   1270         int i = table[i1] + (c & 0xf);
   1271         return results[i];
   1272     }
   1273 
   1274     /* single-byte fromUnicode: set the 16-bit result word with newValue*/
   1275     static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, char[] results, int c, int newValue) {
   1276         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
   1277         int i = table[i1] + (c & 0xf);
   1278         results[i] = (char) newValue;
   1279     }
   1280 
   1281     /* multi-byte fromUnicode: get the 32-bit stage 2 entry */
   1282     static int MBCS_STAGE_2_FROM_U(char[] table, int[] tableInts, int c) {
   1283         int i = table[(c) >>> 10] + ((c >>> 4) & 0x3f);
   1284         return tableInts[i];
   1285     }
   1286 
   1287     private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {
   1288         return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
   1289     }
   1290 
   1291     static char MBCS_VALUE_2_FROM_STAGE_2(char[] chars, int stage2Entry, int c) {
   1292         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
   1293         return chars[i];
   1294     }
   1295 
   1296     static void MBCS_VALUE_2_FROM_STAGE_2_SET(char[] chars, int stage2Entry, int c, int newValue) {
   1297         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
   1298         chars[i] = (char) newValue;
   1299     }
   1300 
   1301     private static int MBCS_VALUE_4_FROM_STAGE_2(int[] ints, int stage2Entry, int c) {
   1302         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
   1303         return ints[i];
   1304     }
   1305 
   1306     static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
   1307         return ((16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
   1308     }
   1309 
   1310     // ------------UConverterExt-------------------------------------------------------
   1311 
   1312     static final int EXT_INDEXES_LENGTH = 0; /* 0 */
   1313 
   1314     static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
   1315     static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
   1316     static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
   1317     static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
   1318 
   1319     static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
   1320     static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
   1321     static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
   1322     static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
   1323     static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
   1324 
   1325     static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
   1326     static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
   1327     static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
   1328     static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
   1329     static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
   1330     static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
   1331     static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
   1332 
   1333     private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */
   1334     // private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;
   1335     // private static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1;
   1336     //
   1337     // private static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */
   1338     //
   1339     // private static final int EXT_SIZE=31;
   1340     // private static final int EXT_INDEXES_MIN_LENGTH=32;
   1341 
   1342     static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
   1343 
   1344     /* toUnicode helpers -------------------------------------------------------- */
   1345 
   1346     private static final int TO_U_BYTE_SHIFT = 24;
   1347     private static final int TO_U_VALUE_MASK = 0xffffff;
   1348     private static final int TO_U_MIN_CODE_POINT = 0x1f0000;
   1349     private static final int TO_U_MAX_CODE_POINT = 0x2fffff;
   1350     private static final int TO_U_ROUNDTRIP_FLAG = (1 << 23);
   1351     private static final int TO_U_INDEX_MASK = 0x3ffff;
   1352     private static final int TO_U_LENGTH_SHIFT = 18;
   1353     private static final int TO_U_LENGTH_OFFSET = 12;
   1354 
   1355     /* maximum number of indexed UChars */
   1356     static final int MAX_UCHARS = 19;
   1357 
   1358     static int TO_U_GET_BYTE(int word) {
   1359         return word >>> TO_U_BYTE_SHIFT;
   1360     }
   1361 
   1362     static int TO_U_GET_VALUE(int word) {
   1363         return word & TO_U_VALUE_MASK;
   1364     }
   1365 
   1366     static boolean TO_U_IS_ROUNDTRIP(int value) {
   1367         return (value & TO_U_ROUNDTRIP_FLAG) != 0;
   1368     }
   1369 
   1370     static boolean TO_U_IS_PARTIAL(int value) {
   1371         return 0 <= value && value < TO_U_MIN_CODE_POINT;
   1372     }
   1373 
   1374     static int TO_U_GET_PARTIAL_INDEX(int value) {
   1375         return value;
   1376     }
   1377 
   1378     static int TO_U_MASK_ROUNDTRIP(int value) {
   1379         return value & ~TO_U_ROUNDTRIP_FLAG;
   1380     }
   1381 
   1382     private static int TO_U_MAKE_WORD(byte b, int value) {
   1383         // TO_U_BYTE_SHIFT == 24: safe to just shift the signed byte-as-int.
   1384         return (b << TO_U_BYTE_SHIFT) | value;
   1385     }
   1386 
   1387     /* use after masking off the roundtrip flag */
   1388     static boolean TO_U_IS_CODE_POINT(int value) {
   1389         assert value >= 0;
   1390         return value <= TO_U_MAX_CODE_POINT;
   1391     }
   1392 
   1393     static int TO_U_GET_CODE_POINT(int value) {
   1394         assert value >= 0;
   1395         return value - TO_U_MIN_CODE_POINT;
   1396     }
   1397 
   1398     private static int TO_U_GET_INDEX(int value) {
   1399         return value & TO_U_INDEX_MASK;
   1400     }
   1401 
   1402     private static int TO_U_GET_LENGTH(int value) {
   1403         return (value >>> TO_U_LENGTH_SHIFT) - TO_U_LENGTH_OFFSET;
   1404     }
   1405 
   1406     /* fromUnicode helpers ------------------------------------------------------ */
   1407 
   1408     /* most trie constants are shared with ucnvmbcs.h */
   1409     private static final int STAGE_2_LEFT_SHIFT = 2;
   1410 
   1411     // private static final int STAGE_3_GRANULARITY = 4;
   1412 
   1413     /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
   1414     static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
   1415         return stage3.get(((int) stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT)
   1416                 + (c & 0xf));
   1417     }
   1418 
   1419     private static final int FROM_U_LENGTH_SHIFT = 24;
   1420     private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31;
   1421     static final int FROM_U_RESERVED_MASK = 0x60000000;
   1422     private static final int FROM_U_DATA_MASK = 0xffffff;
   1423 
   1424     /* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
   1425     static final int FROM_U_SUBCHAR1 = 0x80000001;
   1426 
   1427     /* at most 3 bytes in the lower part of the value */
   1428     private static final int FROM_U_MAX_DIRECT_LENGTH = 3;
   1429 
   1430     /* maximum number of indexed bytes */
   1431     static final int MAX_BYTES = 0x1f;
   1432 
   1433     static boolean FROM_U_IS_PARTIAL(int value) {
   1434         return (value >>> FROM_U_LENGTH_SHIFT) == 0;
   1435     }
   1436 
   1437     static int FROM_U_GET_PARTIAL_INDEX(int value) {
   1438         return value;
   1439     }
   1440 
   1441     static boolean FROM_U_IS_ROUNDTRIP(int value) {
   1442         return (value & FROM_U_ROUNDTRIP_FLAG) != 0;
   1443     }
   1444 
   1445     private static int FROM_U_MASK_ROUNDTRIP(int value) {
   1446         return value & ~FROM_U_ROUNDTRIP_FLAG;
   1447     }
   1448 
   1449     /* use after masking off the roundtrip flag */
   1450     static int FROM_U_GET_LENGTH(int value) {
   1451         return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES;
   1452     }
   1453 
   1454     /* get bytes or bytes index */
   1455     static int FROM_U_GET_DATA(int value) {
   1456         return value & FROM_U_DATA_MASK;
   1457     }
   1458 
   1459     /* get the pointer to an extension array from indexes[index] */
   1460     static Buffer ARRAY(ByteBuffer indexes, int index, Class<?> itemType) {
   1461         int oldpos = indexes.position();
   1462         Buffer b;
   1463 
   1464         // TODO: It is very inefficient to create Buffer objects for each array access.
   1465         // We should create an inner class Extensions (or sibling class CharsetMBCSExtensions)
   1466         // which has buffers for the arrays, together with the code that works with them.
   1467         indexes.position(indexes.getInt(index << 2));
   1468         if (itemType == int.class)
   1469             b = indexes.asIntBuffer();
   1470         else if (itemType == char.class)
   1471             b = indexes.asCharBuffer();
   1472         else if (itemType == short.class)
   1473             b = indexes.asShortBuffer();
   1474         else
   1475             // default or (itemType == byte.class)
   1476             b = indexes.slice();
   1477         indexes.position(oldpos);
   1478         return b;
   1479     }
   1480 
   1481     private static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes) {
   1482         indexes.position(0);
   1483         return indexes.getInt(EXT_COUNT_BYTES) & 0xff;
   1484     }
   1485 
   1486     /*
   1487      * @return index of the UChar, if found; else <0
   1488      */
   1489     static int findFromU(CharBuffer fromUSection, int length, char u) {
   1490         int i, start, limit;
   1491 
   1492         /* binary search */
   1493         start = 0;
   1494         limit = length;
   1495         for (;;) {
   1496             i = limit - start;
   1497             if (i <= 1) {
   1498                 break; /* done */
   1499             }
   1500             /* start<limit-1 */
   1501 
   1502             if (i <= 4) {
   1503                 /* linear search for the last part */
   1504                 if (u <= fromUSection.get(fromUSection.position() + start)) {
   1505                     break;
   1506                 }
   1507                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {
   1508                     break;
   1509                 }
   1510                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {
   1511                     break;
   1512                 }
   1513                 /* always break at start==limit-1 */
   1514                 ++start;
   1515                 break;
   1516             }
   1517 
   1518             i = (start + limit) / 2;
   1519             if (u < fromUSection.get(fromUSection.position() + i)) {
   1520                 limit = i;
   1521             } else {
   1522                 start = i;
   1523             }
   1524         }
   1525 
   1526         /* did we really find it? */
   1527         if (start < limit && u == fromUSection.get(fromUSection.position() + start)) {
   1528             return start;
   1529         } else {
   1530             return -1; /* not found */
   1531         }
   1532     }
   1533 
   1534     /*
   1535      * @return lookup value for the byte, if found; else 0
   1536      */
   1537     static int findToU(IntBuffer toUSection, int length, short byt) {
   1538         long word0, word;
   1539         int i, start, limit;
   1540 
   1541         /* check the input byte against the lowest and highest section bytes */
   1542         // agljport:comment instead of receiving a start position parameter for toUSection we'll rely on its position
   1543         // property
   1544         start = TO_U_GET_BYTE(toUSection.get(toUSection.position()));
   1545         limit = TO_U_GET_BYTE(toUSection.get(toUSection.position() + length - 1));
   1546         if (byt < start || limit < byt) {
   1547             return 0; /* the byte is out of range */
   1548         }
   1549 
   1550         if (length == ((limit - start) + 1)) {
   1551             /* direct access on a linear array */
   1552             return TO_U_GET_VALUE(toUSection.get(toUSection.position() + byt - start)); /* could be 0 */
   1553         }
   1554 
   1555         /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
   1556         word0 = TO_U_MAKE_WORD((byte) byt, 0) & UConverterConstants.UNSIGNED_INT_MASK;
   1557 
   1558         /*
   1559          * Shift byte once instead of each section word and add 0xffffff. We will compare the shifted/added byte
   1560          * (bbffffff) against section words which have byte values in the same bit position. If and only if byte bb <
   1561          * section byte ss then bbffffff<ssvvvvvv for all v=0..f so we need not mask off the lower 24 bits of each
   1562          * section word.
   1563          */
   1564         word = word0 | TO_U_VALUE_MASK;
   1565 
   1566         /* binary search */
   1567         start = 0;
   1568         limit = length;
   1569         for (;;) {
   1570             i = limit - start;
   1571             if (i <= 1) {
   1572                 break; /* done */
   1573             }
   1574             /* start<limit-1 */
   1575 
   1576             if (i <= 4) {
   1577                 /* linear search for the last part */
   1578                 if (word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1579                     break;
   1580                 }
   1581                 if (++start < limit
   1582                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1583                     break;
   1584                 }
   1585                 if (++start < limit
   1586                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1587                     break;
   1588                 }
   1589                 /* always break at start==limit-1 */
   1590                 ++start;
   1591                 break;
   1592             }
   1593 
   1594             i = (start + limit) / 2;
   1595             if (word < (toUSection.get(toUSection.position() + i) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1596                 limit = i;
   1597             } else {
   1598                 start = i;
   1599             }
   1600         }
   1601 
   1602         /* did we really find it? */
   1603         if (start < limit) {
   1604             word = (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK);
   1605             if (byt == TO_U_GET_BYTE((int)word)) {
   1606                 return TO_U_GET_VALUE((int) word); /* never 0 */
   1607             }
   1608         }
   1609         return 0; /* not found */
   1610     }
   1611 
   1612     /*
   1613      * TRUE if not an SI/SO stateful converter, or if the match length fits with the current converter state
   1614      */
   1615     static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {
   1616         return sisoState < 0 || (sisoState == 0) == (match == 1);
   1617     }
   1618 
   1619     /*
   1620      * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), or 1 for DBCS-only, or -1 if the converter is not
   1621      * SI/SO stateful
   1622      *
   1623      * Note: For SI/SO stateful converters getting here, cnv->mode==0 is equivalent to firstLength==1.
   1624      */
   1625     private static int SISO_STATE(UConverterSharedData sharedData, int mode) {
   1626         return sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO ? (byte) mode
   1627                 : sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;
   1628     }
   1629 
   1630     class CharsetDecoderMBCS extends CharsetDecoderICU {
   1631 
   1632         CharsetDecoderMBCS(CharsetICU cs) {
   1633             super(cs);
   1634         }
   1635 
   1636         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
   1637         /* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */
   1638             return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
   1639         }
   1640 
   1641         /*
   1642          * continue partial match with new input never called for simple, single-character conversion
   1643          */
   1644         private CoderResult continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex,
   1645                 boolean flush) {
   1646             CoderResult cr = CoderResult.UNDERFLOW;
   1647 
   1648             int[] value = new int[1];
   1649             int match, length;
   1650 
   1651             match = matchToU((byte) SISO_STATE(sharedData, mode), preToUArray, preToUBegin, preToULength, source,
   1652                     value, isToUUseFallback(), flush);
   1653 
   1654             if (match > 0) {
   1655                 if (match >= preToULength) {
   1656                     /* advance src pointer for the consumed input */
   1657                     source.position(source.position() + match - preToULength);
   1658                     preToULength = 0;
   1659                 } else {
   1660                     /* the match did not use all of preToU[] - keep the rest for replay */
   1661                     length = preToULength - match;
   1662                     System.arraycopy(preToUArray, preToUBegin + match, preToUArray, preToUBegin, length);
   1663                     preToULength = (byte) -length;
   1664                 }
   1665 
   1666                 /* write result */
   1667                 cr = writeToU(value[0], target, offsets, srcIndex);
   1668             } else if (match < 0) {
   1669                 /* save state for partial match */
   1670                 int j, sArrayIndex;
   1671 
   1672                 /* just _append_ the newly consumed input to preToU[] */
   1673                 sArrayIndex = source.position();
   1674                 match = -match;
   1675                 for (j = preToULength; j < match; ++j) {
   1676                     preToUArray[j] = source.get(sArrayIndex++);
   1677                 }
   1678                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
   1679                 preToULength = (byte) match;
   1680             } else /* match==0 */{
   1681                 /*
   1682                  * no match
   1683                  *
   1684                  * We need to split the previous input into two parts:
   1685                  *
   1686                  * 1. The first codepage character is unmappable - that's how we got into trying the extension data in
   1687                  * the first place. We need to move it from the preToU buffer to the error buffer, set an error code,
   1688                  * and prepare the rest of the previous input for 2.
   1689                  *
   1690                  * 2. The rest of the previous input must be converted once we come back from the callback for the first
   1691                  * character. At that time, we have to try again from scratch to convert these input characters. The
   1692                  * replay will be handled by the ucnv.c conversion code.
   1693                  */
   1694 
   1695                 /* move the first codepage character to the error field */
   1696                 System.arraycopy(preToUArray, preToUBegin, toUBytesArray, toUBytesBegin, preToUFirstLength);
   1697                 toULength = preToUFirstLength;
   1698 
   1699                 /* move the rest up inside the buffer */
   1700                 length = preToULength - preToUFirstLength;
   1701                 if (length > 0) {
   1702                     System.arraycopy(preToUArray, preToUBegin + preToUFirstLength, preToUArray, preToUBegin, length);
   1703                 }
   1704 
   1705                 /* mark preToU for replay */
   1706                 preToULength = (byte) -length;
   1707 
   1708                 /* set the error code for unassigned */
   1709                 cr = CoderResult.unmappableForLength(preToUFirstLength);
   1710             }
   1711             return cr;
   1712         }
   1713 
   1714         /*
   1715          * this works like matchFromU() except - the first character is in pre - no trie is used - the returned
   1716          * matchLength is not offset by 2
   1717          */
   1718         private int matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source,
   1719                 int[] pMatchValue, boolean isUseFallback, boolean flush) {
   1720             ByteBuffer cx = sharedData.mbcs.extIndexes;
   1721             IntBuffer toUTable, toUSection;
   1722 
   1723             int value, matchValue, srcLength = 0;
   1724             int i, j, index, length, matchLength;
   1725             short b;
   1726 
   1727             if (cx == null || cx.asIntBuffer().get(EXT_TO_U_LENGTH) <= 0) {
   1728                 return 0; /* no extension data, no match */
   1729             }
   1730 
   1731             /* initialize */
   1732             toUTable = (IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);
   1733             index = 0;
   1734 
   1735             matchValue = 0;
   1736             i = j = matchLength = 0;
   1737             if (source != null) {
   1738                 srcLength = source.remaining();
   1739             }
   1740 
   1741             if (sisoState == 0) {
   1742                 /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
   1743                 if (preLength > 1) {
   1744                     return 0; /* no match of a DBCS sequence in SBCS mode */
   1745                 } else if (preLength == 1) {
   1746                     srcLength = 0;
   1747                 } else /* preLength==0 */{
   1748                     if (srcLength > 1) {
   1749                         srcLength = 1;
   1750                     }
   1751                 }
   1752                 flush = true;
   1753             }
   1754 
   1755             /* we must not remember fallback matches when not using fallbacks */
   1756 
   1757             /* match input units until there is a full match or the input is consumed */
   1758             for (;;) {
   1759                 /* go to the next section */
   1760                 int oldpos = toUTable.position();
   1761                 toUSection = ((IntBuffer) toUTable.position(index)).slice();
   1762                 toUTable.position(oldpos);
   1763 
   1764                 /* read first pair of the section */
   1765                 value = toUSection.get();
   1766                 length = TO_U_GET_BYTE(value);
   1767                 value = TO_U_GET_VALUE(value);
   1768                 if (value != 0 && (TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback))
   1769                         && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
   1770                     /* remember longest match so far */
   1771                     matchValue = value;
   1772                     matchLength = i + j;
   1773                 }
   1774 
   1775                 /* match pre[] then src[] */
   1776                 if (i < preLength) {
   1777                     b = (short) (preArray[preArrayBegin + i++] & UConverterConstants.UNSIGNED_BYTE_MASK);
   1778                 } else if (j < srcLength) {
   1779                     b = (short) (source.get(source.position() + j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
   1780                 } else {
   1781                     /* all input consumed, partial match */
   1782                     if (flush || (length = (i + j)) > MAX_BYTES) {
   1783                         /*
   1784                          * end of the entire input stream, stop with the longest match so far or: partial match must not
   1785                          * be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers
   1786                          */
   1787                         break;
   1788                     } else {
   1789                         /* continue with more input next time */
   1790                         return -length;
   1791                     }
   1792                 }
   1793 
   1794                 /* search for the current UChar */
   1795                 value = findToU(toUSection, length, b);
   1796                 if (value == 0) {
   1797                     /* no match here, stop with the longest match so far */
   1798                     break;
   1799                 } else {
   1800                     if (TO_U_IS_PARTIAL(value)) {
   1801                         /* partial match, continue */
   1802                         index = TO_U_GET_PARTIAL_INDEX(value);
   1803                     } else {
   1804                         if ((TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback)) && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
   1805                             /* full match, stop with result */
   1806                             matchValue = value;
   1807                             matchLength = i + j;
   1808                         } else {
   1809                             /* full match on fallback not taken, stop with the longest match so far */
   1810                         }
   1811                         break;
   1812                     }
   1813                 }
   1814             }
   1815 
   1816             if (matchLength == 0) {
   1817                 /* no match at all */
   1818                 return 0;
   1819             }
   1820 
   1821             /* return result */
   1822             pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);
   1823             return matchLength;
   1824         }
   1825 
   1826         private CoderResult writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex) {
   1827             ByteBuffer cx = sharedData.mbcs.extIndexes;
   1828             /* output the result */
   1829             if (TO_U_IS_CODE_POINT(value)) {
   1830                 /* output a single code point */
   1831                 return toUWriteCodePoint(TO_U_GET_CODE_POINT(value), target, offsets, srcIndex);
   1832             } else {
   1833                 /* output a string - with correct data we have resultLength>0 */
   1834 
   1835                 char[] a = new char[TO_U_GET_LENGTH(value)];
   1836                 CharBuffer cb = ((CharBuffer) ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class));
   1837                 cb.position(TO_U_GET_INDEX(value));
   1838                 cb.get(a, 0, a.length);
   1839                 return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex);
   1840             }
   1841         }
   1842 
   1843         private CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex) {
   1844             CoderResult cr = CoderResult.UNDERFLOW;
   1845             int tBeginIndex = target.position();
   1846 
   1847             if (target.hasRemaining()) {
   1848                 if (c <= 0xffff) {
   1849                     target.put((char) c);
   1850                     c = UConverterConstants.U_SENTINEL;
   1851                 } else /* c is a supplementary code point */{
   1852                     target.put(UTF16.getLeadSurrogate(c));
   1853                     c = UTF16.getTrailSurrogate(c);
   1854                     if (target.hasRemaining()) {
   1855                         target.put((char) c);
   1856                         c = UConverterConstants.U_SENTINEL;
   1857                     }
   1858                 }
   1859 
   1860                 /* write offsets */
   1861                 if (offsets != null) {
   1862                     offsets.put(sourceIndex);
   1863                     if ((tBeginIndex + 1) < target.position()) {
   1864                         offsets.put(sourceIndex);
   1865                     }
   1866                 }
   1867             }
   1868 
   1869             /* write overflow from c */
   1870             if (c >= 0) {
   1871                 charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c);
   1872                 cr = CoderResult.OVERFLOW;
   1873             }
   1874 
   1875             return cr;
   1876         }
   1877 
   1878         /*
   1879          * Input sequence: cnv->toUBytes[0..length[ @return if(U_FAILURE) return the length (toULength, byteIndex) for
   1880          * the input else return 0 after output has been written to the target
   1881          */
   1882         private int toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex,
   1883                 boolean flush, CoderResult[] cr) {
   1884             // ByteBuffer cx;
   1885 
   1886             if (sharedData.mbcs.extIndexes != null
   1887                     && initialMatchToU(length, source, target, offsets, sourceIndex, flush, cr)) {
   1888                 return 0; /* an extension mapping handled the input */
   1889             }
   1890 
   1891             /* GB 18030 */
   1892             if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) {
   1893                 int[] range;
   1894                 int linear;
   1895                 int i;
   1896 
   1897                 linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);
   1898                 for (i = 0; i < gb18030Ranges.length; ++i) {
   1899                     range = gb18030Ranges[i];
   1900                     if (range[2] <= linear && linear <= range[3]) {
   1901                         /* found the sequence, output the Unicode code point for it */
   1902                         cr[0] = CoderResult.UNDERFLOW;
   1903 
   1904                         /* add the linear difference between the input and start sequences to the start code point */
   1905                         linear = range[0] + (linear - range[2]);
   1906 
   1907                         /* output this code point */
   1908                         cr[0] = toUWriteCodePoint(linear, target, offsets, sourceIndex);
   1909 
   1910                         return 0;
   1911                     }
   1912                 }
   1913             }
   1914 
   1915             /* no mapping */
   1916             cr[0] = CoderResult.unmappableForLength(length);
   1917             return length;
   1918         }
   1919 
   1920         /*
   1921          * target<targetLimit; set error code for overflow
   1922          */
   1923         private boolean initialMatchToU(int firstLength, ByteBuffer source, CharBuffer target, IntBuffer offsets,
   1924                 int srcIndex, boolean flush, CoderResult[] cr) {
   1925             int[] value = new int[1];
   1926             int match = 0;
   1927 
   1928             /* try to match */
   1929             match = matchToU((byte) SISO_STATE(sharedData, mode), toUBytesArray, toUBytesBegin, firstLength, source,
   1930                     value, isToUUseFallback(), flush);
   1931             if (match > 0) {
   1932                 /* advance src pointer for the consumed input */
   1933                 source.position(source.position() + match - firstLength);
   1934 
   1935                 /* write result to target */
   1936                 cr[0] = writeToU(value[0], target, offsets, srcIndex);
   1937                 return true;
   1938             } else if (match < 0) {
   1939                 /* save state for partial match */
   1940                 byte[] sArray;
   1941                 int sArrayIndex;
   1942                 int j;
   1943 
   1944                 /* copy the first code point */
   1945                 sArray = toUBytesArray;
   1946                 sArrayIndex = toUBytesBegin;
   1947                 preToUFirstLength = (byte) firstLength;
   1948                 for (j = 0; j < firstLength; ++j) {
   1949                     preToUArray[j] = sArray[sArrayIndex++];
   1950                 }
   1951 
   1952                 /* now copy the newly consumed input */
   1953                 sArrayIndex = source.position();
   1954                 match = -match;
   1955                 for (; j < match; ++j) {
   1956                     preToUArray[j] = source.get(sArrayIndex++);
   1957                 }
   1958                 source.position(sArrayIndex);
   1959                 preToULength = (byte) match;
   1960                 return true;
   1961             } else /* match==0 no match */{
   1962                 return false;
   1963             }
   1964         }
   1965 
   1966         private int simpleMatchToU(ByteBuffer source, boolean useFallback) {
   1967             int[] value = new int[1];
   1968             int match;
   1969 
   1970             if (source.remaining() <= 0) {
   1971                 return 0xffff;
   1972             }
   1973 
   1974             /* try to match */
   1975             byte[] sourceArray;
   1976             int sourcePosition, sourceLimit;
   1977             if (source.isReadOnly()) {
   1978                 // source.array() would throw an exception
   1979                 sourcePosition = source.position();  // relative to source.array()
   1980                 sourceArray = new byte[Math.min(source.remaining(), EXT_MAX_BYTES)];
   1981                 source.get(sourceArray).position(sourcePosition);
   1982                 sourcePosition = 0;  // relative to sourceArray
   1983                 sourceLimit = sourceArray.length;
   1984             } else {
   1985                 sourceArray = source.array();
   1986                 sourcePosition = source.position();
   1987                 sourceLimit = source.limit();
   1988             }
   1989             match = matchToU((byte) -1, sourceArray, sourcePosition, sourceLimit, null, value, useFallback, true);
   1990 
   1991             if (match == source.remaining()) {
   1992                 /* write result for simple, single-character conversion */
   1993                 if (TO_U_IS_CODE_POINT(value[0])) {
   1994                     return TO_U_GET_CODE_POINT(value[0]);
   1995                 }
   1996             }
   1997 
   1998             /*
   1999              * return no match because - match>0 && value points to string: simple conversion cannot handle multiple
   2000              * code points - match>0 && match!=length: not all input consumed, forbidden for this function - match==0:
   2001              * no match found in the first place - match<0: partial match, not supported for simple conversion (and
   2002              * flush==TRUE)
   2003              */
   2004             return 0xfffe;
   2005         }
   2006 
   2007         CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
   2008             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2009 
   2010             int sourceArrayIndex, sourceArrayIndexStart;
   2011             int stateTable[][/* 256 */];
   2012             char[] unicodeCodeUnits;
   2013 
   2014             int offset;
   2015             byte state;
   2016             int byteIndex;
   2017             byte[] bytes;
   2018 
   2019             int sourceIndex, nextSourceIndex;
   2020 
   2021             int entry = 0;
   2022             char c;
   2023             byte action;
   2024 
   2025             if (preToULength > 0) {
   2026                 /*
   2027                  * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
   2028                  * continuous offsets
   2029                  */
   2030                 cr[0] = continueMatchToU(source, target, offsets, -1, flush);
   2031 
   2032                 if (cr[0].isError() || preToULength < 0) {
   2033                     return cr[0];
   2034                 }
   2035             }
   2036 
   2037             if (sharedData.mbcs.countStates == 1) {
   2038                 if (!sharedData.mbcs.hasSupplementary()) {
   2039                     cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
   2040                 } else {
   2041                     cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
   2042                 }
   2043                 return cr[0];
   2044             }
   2045 
   2046             /* set up the local pointers */
   2047             sourceArrayIndex = sourceArrayIndexStart = source.position();
   2048 
   2049             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2050                 stateTable = sharedData.mbcs.swapLFNLStateTable;
   2051             } else {
   2052                 stateTable = sharedData.mbcs.stateTable;
   2053             }
   2054             unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
   2055 
   2056             /* get the converter state from UConverter */
   2057             offset = toUnicodeStatus;
   2058             byteIndex = toULength;
   2059             bytes = toUBytesArray;
   2060 
   2061             /*
   2062              * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
   2063              * (dbcsOnlyState==0 if it is not a DBCS-only converter)
   2064              */
   2065             state = (byte)mode;
   2066             if (state == 0) {
   2067                 state = sharedData.mbcs.dbcsOnlyState;
   2068             }
   2069 
   2070             /* sourceIndex=-1 if the current character began in the previous buffer */
   2071             sourceIndex = byteIndex == 0 ? 0 : -1;
   2072             nextSourceIndex = 0;
   2073 
   2074             /* conversion loop */
   2075             while (sourceArrayIndex < source.limit()) {
   2076                 /*
   2077                  * This following test is to see if available input would overflow the output. It does not catch output
   2078                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
   2079                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,
   2080                  * too.
   2081                  */
   2082                 if (!target.hasRemaining()) {
   2083                     /* target is full */
   2084                     cr[0] = CoderResult.OVERFLOW;
   2085                     break;
   2086                 }
   2087 
   2088                 if (byteIndex == 0) {
   2089                     /* optimized loop for 1/2-byte input and BMP output */
   2090                     // agljport:todo see ucnvmbcs.c for deleted block
   2091                     do {
   2092                         entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
   2093                         if (MBCS_ENTRY_IS_TRANSITION(entry)) {
   2094                             state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
   2095                             offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
   2096                             ++sourceArrayIndex;
   2097                             if (sourceArrayIndex < source.limit()
   2098                                     && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
   2099                                     && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
   2100                                     && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
   2101                                 ++sourceArrayIndex;
   2102                                 target.put(c);
   2103                                 if (offsets != null) {
   2104                                     offsets.put(sourceIndex);
   2105                                     sourceIndex = (nextSourceIndex += 2);
   2106                                 }
   2107                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
   2108                                 offset = 0;
   2109                             } else {
   2110                                 /* set the state and leave the optimized loop */
   2111                                 ++nextSourceIndex;
   2112                                 bytes[0] = source.get(sourceArrayIndex - 1);
   2113                                 byteIndex = 1;
   2114                                 break;
   2115                             }
   2116                         } else {
   2117                             if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
   2118                                 /* output BMP code point */
   2119                                 ++sourceArrayIndex;
   2120                                 target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2121                                 if (offsets != null) {
   2122                                     offsets.put(sourceIndex);
   2123                                     sourceIndex = ++nextSourceIndex;
   2124                                 }
   2125                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
   2126                             } else {
   2127                                 /* leave the optimized loop */
   2128                                 break;
   2129                             }
   2130                         }
   2131                     } while (sourceArrayIndex < source.limit() && target.hasRemaining());
   2132                     /*
   2133                      * these tests and break statements could be put inside the loop if C had "break outerLoop" like
   2134                      * Java
   2135                      */
   2136                     if (sourceArrayIndex >= source.limit()) {
   2137                         break;
   2138                     }
   2139                     if (!target.hasRemaining()) {
   2140                         /* target is full */
   2141                         cr[0] = CoderResult.OVERFLOW;
   2142                         break;
   2143                     }
   2144 
   2145                     ++nextSourceIndex;
   2146                     bytes[byteIndex++] = source.get(sourceArrayIndex++);
   2147                 } else /* byteIndex>0 */{
   2148                     ++nextSourceIndex;
   2149                     entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
   2150                             & UConverterConstants.UNSIGNED_BYTE_MASK];
   2151                 }
   2152 
   2153                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {
   2154                     state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
   2155                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
   2156                     continue;
   2157                 }
   2158 
   2159                 /* save the previous state for proper extension mapping with SI/SO-stateful converters */
   2160                 mode = state;
   2161 
   2162                 /* set the next state early so that we can reuse the entry variable */
   2163                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
   2164 
   2165                 /*
   2166                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2167                  * switch.
   2168                  */
   2169                 action = (byte)MBCS_ENTRY_FINAL_ACTION(entry);
   2170                 if (action == MBCS_STATE_VALID_16) {
   2171                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2172                     c = unicodeCodeUnits[offset];
   2173                     if (c < 0xfffe) {
   2174                         /* output BMP code point */
   2175                         target.put(c);
   2176                         if (offsets != null) {
   2177                             offsets.put(sourceIndex);
   2178                         }
   2179                         byteIndex = 0;
   2180                     } else if (c == 0xfffe) {
   2181                         if (isFallbackUsed() && (entry = getFallback(sharedData.mbcs, offset)) != 0xfffe) {
   2182                             /* output fallback BMP code point */
   2183                             target.put((char)entry);
   2184                             if (offsets != null) {
   2185                                 offsets.put(sourceIndex);
   2186                             }
   2187                             byteIndex = 0;
   2188                         }
   2189                     } else {
   2190                         /* callback(illegal) */
   2191                         cr[0] = CoderResult.malformedForLength(byteIndex);
   2192                     }
   2193                 } else if (action == MBCS_STATE_VALID_DIRECT_16) {
   2194                     /* output BMP code point */
   2195                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2196                     if (offsets != null) {
   2197                         offsets.put(sourceIndex);
   2198                     }
   2199                     byteIndex = 0;
   2200                 } else if (action == MBCS_STATE_VALID_16_PAIR) {
   2201                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2202                     c = unicodeCodeUnits[offset++];
   2203                     if (c < 0xd800) {
   2204                         /* output BMP code point below 0xd800 */
   2205                         target.put(c);
   2206                         if (offsets != null) {
   2207                             offsets.put(sourceIndex);
   2208                         }
   2209                         byteIndex = 0;
   2210                     } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
   2211                         /* output roundtrip or fallback surrogate pair */
   2212                         target.put((char)(c & 0xdbff));
   2213                         if (offsets != null) {
   2214                             offsets.put(sourceIndex);
   2215                         }
   2216                         byteIndex = 0;
   2217                         if (target.hasRemaining()) {
   2218                             target.put(unicodeCodeUnits[offset]);
   2219                             if (offsets != null) {
   2220                                 offsets.put(sourceIndex);
   2221                             }
   2222                         } else {
   2223                             /* target overflow */
   2224                             charErrorBufferArray[0] = unicodeCodeUnits[offset];
   2225                             charErrorBufferLength = 1;
   2226                             cr[0] = CoderResult.OVERFLOW;
   2227 
   2228                             offset = 0;
   2229                             break;
   2230                         }
   2231                     } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
   2232                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
   2233                         target.put(unicodeCodeUnits[offset]);
   2234                         if (offsets != null) {
   2235                             offsets.put(sourceIndex);
   2236                         }
   2237                         byteIndex = 0;
   2238                     } else if (c == 0xffff) {
   2239                         /* callback(illegal) */
   2240                         cr[0] = CoderResult.malformedForLength(byteIndex);
   2241                     }
   2242                 } else if (action == MBCS_STATE_VALID_DIRECT_20
   2243                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
   2244                     entry = MBCS_ENTRY_FINAL_VALUE(entry);
   2245                     /* output surrogate pair */
   2246                     target.put((char)(0xd800 | (char)(entry >> 10)));
   2247                     if (offsets != null) {
   2248                         offsets.put(sourceIndex);
   2249                     }
   2250                     byteIndex = 0;
   2251                     c = (char)(0xdc00 | (char)(entry & 0x3ff));
   2252                     if (target.hasRemaining()) {
   2253                         target.put(c);
   2254                         if (offsets != null) {
   2255                             offsets.put(sourceIndex);
   2256                         }
   2257                     } else {
   2258                         /* target overflow */
   2259                         charErrorBufferArray[0] = c;
   2260                         charErrorBufferLength = 1;
   2261                         cr[0] = CoderResult.OVERFLOW;
   2262 
   2263                         offset = 0;
   2264                         break;
   2265                     }
   2266                 } else if (action == MBCS_STATE_CHANGE_ONLY) {
   2267                     /*
   2268                      * This serves as a state change without any output. It is useful for reading simple stateful
   2269                      * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
   2270                      * for more sophisticated state transitions.
   2271                      */
   2272                     if (sharedData.mbcs.dbcsOnlyState == 0) {
   2273                         byteIndex = 0;
   2274                     } else {
   2275                         /* SI/SO are illegal for DBCS-only conversion */
   2276                         state = (byte)(mode); /* restore the previous state */
   2277 
   2278                         /* callback(illegal) */
   2279                         cr[0] = CoderResult.malformedForLength(byteIndex);
   2280                     }
   2281                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2282                     if (isFallbackUsed()) {
   2283                         /* output BMP code point */
   2284                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2285                         if (offsets != null) {
   2286                             offsets.put(sourceIndex);
   2287                         }
   2288                         byteIndex = 0;
   2289                     }
   2290                 } else if (action == MBCS_STATE_UNASSIGNED) {
   2291                     /* just fall through */
   2292                 } else if (action == MBCS_STATE_ILLEGAL) {
   2293                     /* callback(illegal) */
   2294                     cr[0] = CoderResult.malformedForLength(byteIndex);
   2295                 } else {
   2296                     /* reserved, must never occur */
   2297                     byteIndex = 0;
   2298                 }
   2299 
   2300                 /* end of action codes: prepare for a new character */
   2301                 offset = 0;
   2302 
   2303                 if (byteIndex == 0) {
   2304                     sourceIndex = nextSourceIndex;
   2305                 } else if (cr[0].isError()) {
   2306                     /* callback(illegal) */
   2307                     if (byteIndex > 1) {
   2308                         /*
   2309                          * Ticket 5691: consistent illegal sequences:
   2310                          * - We include at least the first byte in the illegal sequence.
   2311                          * - If any of the non-initial bytes could be the start of a character,
   2312                          *   we stop the illegal sequence before the first one of those.
   2313                          */
   2314                         boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0);
   2315                         byte i;
   2316                         for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {}
   2317                         if (i < byteIndex) {
   2318                             byte backOutDistance = (byte)(byteIndex - i);
   2319                             int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart;
   2320                             byteIndex = i; /* length of reported illegal byte sequence */
   2321                             if (backOutDistance <= bytesFromThisBuffer) {
   2322                                 sourceArrayIndex -= backOutDistance;
   2323                             } else {
   2324                                 /* Back out bytes from the previous buffer: Need to replay them. */
   2325                                 this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
   2326                                 /* preToULength is negative! */
   2327                                 for (int n = 0; n < -this.preToULength; n++) {
   2328                                     this.preToUArray[n] = bytes[i+n];
   2329                                 }
   2330                                 sourceArrayIndex = sourceArrayIndexStart;
   2331                             }
   2332                         }
   2333                     }
   2334                     break;
   2335                 } else /* unassigned sequences indicated with byteIndex>0 */{
   2336                     /* try an extension mapping */
   2337                     int sourceBeginIndex = sourceArrayIndex;
   2338                     source.position(sourceArrayIndex);
   2339                     byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
   2340                     sourceArrayIndex = source.position();
   2341                     sourceIndex = nextSourceIndex += (sourceArrayIndex - sourceBeginIndex);
   2342 
   2343                     if (cr[0].isError() || cr[0].isOverflow()) {
   2344                         /* not mappable or buffer overflow */
   2345                         break;
   2346                     }
   2347                 }
   2348             }
   2349 
   2350             /* set the converter state back into UConverter */
   2351             toUnicodeStatus = offset;
   2352             mode = state;
   2353             toULength = byteIndex;
   2354 
   2355             /* write back the updated pointers */
   2356             source.position(sourceArrayIndex);
   2357 
   2358             return cr[0];
   2359         }
   2360         /*
   2361          * This version of cnvMBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages that
   2362          * only map to and from the BMP. In addition to single-byte optimizations, the offset calculations become much
   2363          * easier.
   2364          */
   2365         private CoderResult cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
   2366                 boolean flush) {
   2367             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2368 
   2369             int sourceArrayIndex, lastSource;
   2370             int targetCapacity, length;
   2371             int[][] stateTable;
   2372 
   2373             int sourceIndex;
   2374 
   2375             int entry;
   2376             byte action;
   2377 
   2378             /* set up the local pointers */
   2379             sourceArrayIndex = source.position();
   2380             targetCapacity = target.remaining();
   2381 
   2382             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2383                 stateTable = sharedData.mbcs.swapLFNLStateTable;
   2384             } else {
   2385                 stateTable = sharedData.mbcs.stateTable;
   2386             }
   2387 
   2388             /* sourceIndex=-1 if the current character began in the previous buffer */
   2389             sourceIndex = 0;
   2390             lastSource = sourceArrayIndex;
   2391 
   2392             /*
   2393              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
   2394              * sourceLength and targetCapacity
   2395              */
   2396             length = source.remaining();
   2397             if (length < targetCapacity) {
   2398                 targetCapacity = length;
   2399             }
   2400 
   2401             /* conversion loop */
   2402             while (targetCapacity > 0 && sourceArrayIndex < source.limit()) {
   2403                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
   2404                 /* MBCS_ENTRY_IS_FINAL(entry) */
   2405 
   2406                 /* test the most common case first */
   2407                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
   2408                     /* output BMP code point */
   2409                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2410                     --targetCapacity;
   2411                     continue;
   2412                 }
   2413 
   2414                 /*
   2415                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2416                  * switch.
   2417                  */
   2418                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
   2419                 if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2420                     if (isFallbackUsed()) {
   2421                         /* output BMP code point */
   2422                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2423                         --targetCapacity;
   2424                         continue;
   2425                     }
   2426                 } else if (action == MBCS_STATE_UNASSIGNED) {
   2427                     /* just fall through */
   2428                 } else if (action == MBCS_STATE_ILLEGAL) {
   2429                     /* callback(illegal) */
   2430                     cr[0] = CoderResult.malformedForLength(sourceArrayIndex - lastSource);
   2431                 } else {
   2432                     /* reserved, must never occur */
   2433                     continue;
   2434                 }
   2435 
   2436                 /* set offsets since the start or the last extension */
   2437                 if (offsets != null) {
   2438                     int count = sourceArrayIndex - lastSource;
   2439 
   2440                     /* predecrement: do not set the offset for the callback-causing character */
   2441                     while (--count > 0) {
   2442                         offsets.put(sourceIndex++);
   2443                     }
   2444                     /* offset and sourceIndex are now set for the current character */
   2445                 }
   2446 
   2447                 if (cr[0].isError()) {
   2448                     /* callback(illegal) */
   2449                     break;
   2450                 } else /* unassigned sequences indicated with byteIndex>0 */{
   2451                     /* try an extension mapping */
   2452                     lastSource = sourceArrayIndex;
   2453                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);
   2454                     source.position(sourceArrayIndex);
   2455                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
   2456                     sourceArrayIndex = source.position();
   2457                     sourceIndex += 1 + (sourceArrayIndex - lastSource);
   2458 
   2459                     if (cr[0].isError()) {
   2460                         /* not mappable or buffer overflow */
   2461                         break;
   2462                     }
   2463 
   2464                     /* recalculate the targetCapacity after an extension mapping */
   2465                     targetCapacity = target.remaining();
   2466                     length = source.remaining();
   2467                     if (length < targetCapacity) {
   2468                         targetCapacity = length;
   2469                     }
   2470                 }
   2471             }
   2472 
   2473             if (!cr[0].isError() && sourceArrayIndex < source.limit() && !target.hasRemaining()) {
   2474                 /* target is full */
   2475                 cr[0] = CoderResult.OVERFLOW;
   2476             }
   2477 
   2478             /* set offsets since the start or the last callback */
   2479             if (offsets != null) {
   2480                 int count = sourceArrayIndex - lastSource;
   2481                 while (count > 0) {
   2482                     offsets.put(sourceIndex++);
   2483                     --count;
   2484                 }
   2485             }
   2486 
   2487             /* write back the updated pointers */
   2488             source.position(sourceArrayIndex);
   2489 
   2490             return cr[0];
   2491         }
   2492 
   2493         /* This version of cnvMBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
   2494         private CoderResult cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
   2495                 boolean flush) {
   2496             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2497 
   2498             int sourceArrayIndex;
   2499             int[][] stateTable;
   2500 
   2501             int sourceIndex;
   2502 
   2503             int entry;
   2504             char c;
   2505             byte action;
   2506 
   2507             /* set up the local pointers */
   2508             sourceArrayIndex = source.position();
   2509 
   2510             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2511                 stateTable = sharedData.mbcs.swapLFNLStateTable;
   2512             } else {
   2513                 stateTable = sharedData.mbcs.stateTable;
   2514             }
   2515 
   2516             /* sourceIndex=-1 if the current character began in the previous buffer */
   2517             sourceIndex = 0;
   2518 
   2519             /* conversion loop */
   2520             while (sourceArrayIndex < source.limit()) {
   2521                 /*
   2522                  * This following test is to see if available input would overflow the output. It does not catch output
   2523                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
   2524                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,
   2525                  * too.
   2526                  */
   2527                 if (!target.hasRemaining()) {
   2528                     /* target is full */
   2529                     cr[0] = CoderResult.OVERFLOW;
   2530                     break;
   2531                 }
   2532 
   2533                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
   2534                 /* MBCS_ENTRY_IS_FINAL(entry) */
   2535 
   2536                 /* test the most common case first */
   2537                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
   2538                     /* output BMP code point */
   2539                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2540                     if (offsets != null) {
   2541                         offsets.put(sourceIndex);
   2542                     }
   2543 
   2544                     /* normal end of action codes: prepare for a new character */
   2545                     ++sourceIndex;
   2546                     continue;
   2547                 }
   2548 
   2549                 /*
   2550                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2551                  * switch.
   2552                  */
   2553                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
   2554                 if (action == MBCS_STATE_VALID_DIRECT_20
   2555                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
   2556 
   2557                     entry = MBCS_ENTRY_FINAL_VALUE(entry);
   2558                     /* output surrogate pair */
   2559                     target.put((char) (0xd800 | (char) (entry >>> 10)));
   2560                     if (offsets != null) {
   2561                         offsets.put(sourceIndex);
   2562                     }
   2563                     c = (char) (0xdc00 | (char) (entry & 0x3ff));
   2564                     if (target.hasRemaining()) {
   2565                         target.put(c);
   2566                         if (offsets != null) {
   2567                             offsets.put(sourceIndex);
   2568                         }
   2569                     } else {
   2570                         /* target overflow */
   2571                         charErrorBufferArray[0] = c;
   2572                         charErrorBufferLength = 1;
   2573                         cr[0] = CoderResult.OVERFLOW;
   2574                         break;
   2575                     }
   2576 
   2577                     ++sourceIndex;
   2578                     continue;
   2579                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2580                     if (isFallbackUsed()) {
   2581                         /* output BMP code point */
   2582                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2583                         if (offsets != null) {
   2584                             offsets.put(sourceIndex);
   2585                         }
   2586 
   2587                         ++sourceIndex;
   2588                         continue;
   2589                     }
   2590                 } else if (action == MBCS_STATE_UNASSIGNED) {
   2591                     /* just fall through */
   2592                 } else if (action == MBCS_STATE_ILLEGAL) {
   2593                     /* callback(illegal) */
   2594                     cr[0] = CoderResult.malformedForLength(1);
   2595                 } else {
   2596                     /* reserved, must never occur */
   2597                     ++sourceIndex;
   2598                     continue;
   2599                 }
   2600 
   2601                 if (cr[0].isError()) {
   2602                     /* callback(illegal) */
   2603                     break;
   2604                 } else /* unassigned sequences indicated with byteIndex>0 */{
   2605                     /* try an extension mapping */
   2606                     int sourceBeginIndex = sourceArrayIndex;
   2607                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);
   2608                     source.position(sourceArrayIndex);
   2609                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
   2610                     sourceArrayIndex = source.position();
   2611                     sourceIndex += 1 + (sourceArrayIndex - sourceBeginIndex);
   2612 
   2613                     if (cr[0].isError()) {
   2614                         /* not mappable or buffer overflow */
   2615                         break;
   2616                     }
   2617                 }
   2618             }
   2619 
   2620             /* write back the updated pointers */
   2621             source.position(sourceArrayIndex);
   2622 
   2623             return cr[0];
   2624         }
   2625 
   2626         private int getFallback(UConverterMBCSTable mbcsTable, int offset) {
   2627             MBCSToUFallback[] toUFallbacks;
   2628             int i, start, limit;
   2629 
   2630             limit = mbcsTable.countToUFallbacks;
   2631             if (limit > 0) {
   2632                 /* do a binary search for the fallback mapping */
   2633                 toUFallbacks = mbcsTable.toUFallbacks;
   2634                 start = 0;
   2635                 while (start < limit - 1) {
   2636                     i = (start + limit) >>> 1;
   2637                     if (offset < toUFallbacks[i].offset) {
   2638                         limit = i;
   2639                     } else {
   2640                         start = i;
   2641                     }
   2642                 }
   2643 
   2644                 /* did we really find it? */
   2645                 if (offset == toUFallbacks[start].offset) {
   2646                     return toUFallbacks[start].codePoint;
   2647                 }
   2648             }
   2649 
   2650             return 0xfffe;
   2651         }
   2652 
   2653         /**
   2654          * This is a simple version of _MBCSGetNextUChar() that is used by other converter implementations. It only
   2655          * returns an "assigned" result if it consumes the entire input. It does not use state from the converter, nor
   2656          * error codes. It does not handle the EBCDIC swaplfnl option (set in UConverter). It handles conversion
   2657          * extensions but not GB 18030.
   2658          *
   2659          * @return U+fffe unassigned U+ffff illegal otherwise the Unicode code point
   2660          */
   2661         int simpleGetNextUChar(ByteBuffer source, boolean useFallback) {
   2662 
   2663             // #if 0
   2664             // /*
   2665             // * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
   2666             // * TODO In future releases, verify that this function is never called for SBCS
   2667             // * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
   2668             // * Removal improves code coverage.
   2669             // */
   2670             // /* use optimized function if possible */
   2671             // if(sharedData->mbcs.countStates==1) {
   2672             // if(length==1) {
   2673             // return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
   2674             // } else {
   2675             // return 0xffff; /* illegal: more than a single byte for an SBCS converter */
   2676             // }
   2677             // }
   2678             // #endif
   2679 
   2680             /* set up the local pointers */
   2681             int[][] stateTable = sharedData.mbcs.stateTable;
   2682             char[] unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
   2683 
   2684             /* converter state */
   2685             int offset = 0;
   2686             int state = sharedData.mbcs.dbcsOnlyState;
   2687 
   2688             int action;
   2689             int entry;
   2690             int c;
   2691             int i = source.position();
   2692             int length = source.limit() - i;
   2693 
   2694             /* conversion loop */
   2695             while (true) {
   2696                 // entry=stateTable[state][(uint8_t)source[i++]];
   2697                 entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK];
   2698 
   2699                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {
   2700                     state = MBCS_ENTRY_TRANSITION_STATE(entry);
   2701                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
   2702 
   2703                     if (i == source.limit()) {
   2704                         return 0xffff; /* truncated character */
   2705                     }
   2706                 } else {
   2707                     /*
   2708                      * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2709                      * switch.
   2710                      */
   2711                     action = MBCS_ENTRY_FINAL_ACTION(entry);
   2712                     if (action == MBCS_STATE_VALID_16) {
   2713                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2714                         c = unicodeCodeUnits[offset];
   2715                         if (c != 0xfffe) {
   2716                             /* done */
   2717                         } else if (isToUUseFallback()) {
   2718                             c = getFallback(sharedData.mbcs, offset);
   2719                         }
   2720                         /* else done with 0xfffe */
   2721                     } else if (action == MBCS_STATE_VALID_DIRECT_16) {
   2722                         // /* output BMP code point */
   2723                         c = MBCS_ENTRY_FINAL_VALUE_16(entry);
   2724                     } else if (action == MBCS_STATE_VALID_16_PAIR) {
   2725                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2726                         c = unicodeCodeUnits[offset++];
   2727                         if (c < 0xd800) {
   2728                             /* output BMP code point below 0xd800 */
   2729                         } else if (isToUUseFallback() ? c <= 0xdfff : c <= 0xdbff) {
   2730                             /* output roundtrip or fallback supplementary code point */
   2731                             c = (((c & 0x3ff) << 10) + unicodeCodeUnits[offset] + (0x10000 - 0xdc00));
   2732                         } else if (isToUUseFallback() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
   2733                             /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
   2734                             c = unicodeCodeUnits[offset];
   2735                         } else if (c == 0xffff) {
   2736                             return 0xffff;
   2737                         } else {
   2738                             c = 0xfffe;
   2739                         }
   2740                     } else if (action == MBCS_STATE_VALID_DIRECT_20) {
   2741                         /* output supplementary code point */
   2742                         c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
   2743                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2744                         if (!isToUUseFallback(useFallback)) {
   2745                             c = 0xfffe;
   2746                         } else {
   2747                             /* output BMP code point */
   2748                             c = MBCS_ENTRY_FINAL_VALUE_16(entry);
   2749                         }
   2750                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_20) {
   2751                         if (!isToUUseFallback(useFallback)) {
   2752                             c = 0xfffe;
   2753                         } else {
   2754                             /* output supplementary code point */
   2755                             c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
   2756                         }
   2757                     } else if (action == MBCS_STATE_UNASSIGNED) {
   2758                         c = 0xfffe;
   2759                     } else {
   2760                         /*
   2761                          * forbid MBCS_STATE_CHANGE_ONLY for this function, and MBCS_STATE_ILLEGAL and reserved action
   2762                          * codes
   2763                          */
   2764                         return 0xffff;
   2765                     }
   2766                     break;
   2767                 }
   2768             }
   2769 
   2770             if (i != source.limit()) {
   2771                 /* illegal for this function: not all input consumed */
   2772                 return 0xffff;
   2773             }
   2774 
   2775             if (c == 0xfffe) {
   2776                 /* try an extension mapping */
   2777                 if (sharedData.mbcs.extIndexes != null) {
   2778                     /* Increase the limit for proper handling. Used in LMBCS. */
   2779                     if (source.limit() > i + length) {
   2780                         source.limit(i + length);
   2781                     }
   2782                     return simpleMatchToU(source, useFallback);
   2783                 }
   2784             }
   2785 
   2786             return c;
   2787         }
   2788         private boolean hasValidTrailBytes(int[][] stateTable, short state) {
   2789             int[] row = stateTable[state];
   2790             int b, entry;
   2791             /* First test for final entries in this state for some commonly valid byte values. */
   2792             entry = row[0xa1];
   2793             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
   2794                 return true;
   2795             }
   2796             entry = row[0x41];
   2797             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
   2798                 return true;
   2799             }
   2800             /* Then test for final entries in this state. */
   2801             for (b = 0; b <= 0xff; b++) {
   2802                 entry = row[b];
   2803                 if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
   2804                     return true;
   2805                 }
   2806             }
   2807             /* Then recurse for transition entries. */
   2808             for (b = 0; b <= 0xff; b++) {
   2809                 entry = row[b];
   2810                 if (MBCS_ENTRY_IS_TRANSITION(entry) &&
   2811                         hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry))) {
   2812                     return true;
   2813                 }
   2814             }
   2815             return false;
   2816         }
   2817 
   2818         private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) {
   2819             int[] row = stateTable[state];
   2820             int entry = row[b];
   2821             if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
   2822                 return hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry));
   2823             } else {
   2824                 int action = MBCS_ENTRY_FINAL_ACTION(entry);
   2825                 if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
   2826                     return false;   /* SI/SO are illegal for DBCS-only conversion */
   2827                 } else {
   2828                     return (action != MBCS_STATE_ILLEGAL);
   2829                 }
   2830             }
   2831         }
   2832 
   2833 
   2834     }
   2835 
   2836     class CharsetEncoderMBCS extends CharsetEncoderICU {
   2837         private boolean allowReplacementChanges = false;
   2838 
   2839         CharsetEncoderMBCS(CharsetICU cs) {
   2840             super(cs, fromUSubstitution);
   2841             allowReplacementChanges = true; // allow changes in implReplaceWith
   2842             implReset();
   2843         }
   2844 
   2845         protected void implReset() {
   2846             super.implReset();
   2847             preFromUFirstCP = UConverterConstants.U_SENTINEL;
   2848         }
   2849 
   2850         @SuppressWarnings("fallthrough")
   2851         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
   2852             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2853             // if (!source.hasRemaining() && fromUChar32 == 0)
   2854             // return cr[0];
   2855 
   2856             int sourceArrayIndex;
   2857             char[] table;
   2858             byte[] pArray, bytes;
   2859             char[] chars;
   2860             int[] ints;
   2861             int pArrayIndex, outputType, c;
   2862             int prevSourceIndex, sourceIndex, nextSourceIndex;
   2863             int stage2Entry = 0, value = 0, length = 0, prevLength;
   2864             short uniMask;
   2865             // long asciiRoundtrips;
   2866 
   2867             byte[] si_value = new byte[2];
   2868             byte[] so_value = new byte[2];
   2869             int si_value_length = 0, so_value_length = 0;
   2870 
   2871             boolean gotoUnassigned = false;
   2872 
   2873             try {
   2874 
   2875                 if (!flush && preFromUFirstCP >= 0) {
   2876                     /*
   2877                      * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change
   2878                      * with continuous offsets
   2879                      */
   2880                     cr[0] = continueMatchFromU(source, target, offsets, flush, -1);
   2881 
   2882                     if (cr[0].isError() || preFromULength < 0) {
   2883                         return cr[0];
   2884                     }
   2885                 }
   2886 
   2887                 /* use optimized function if possible */
   2888                 outputType = sharedData.mbcs.outputType;
   2889                 uniMask = sharedData.mbcs.unicodeMask;
   2890                 if (outputType == MBCS_OUTPUT_1 && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   2891                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
   2892                         cr[0] = cnvMBCSSingleFromBMPWithOffsets(source, target, offsets, flush);
   2893                     } else {
   2894                         cr[0] = cnvMBCSSingleFromUnicodeWithOffsets(source, target, offsets, flush);
   2895                     }
   2896                     return cr[0];
   2897                 } else if (outputType == MBCS_OUTPUT_2) {
   2898                     cr[0] = cnvMBCSDoubleFromUnicodeWithOffsets(source, target, offsets, flush);
   2899                     return cr[0];
   2900                 }
   2901 
   2902                 table = sharedData.mbcs.fromUnicodeTable;
   2903                 int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
   2904                 sourceArrayIndex = source.position();
   2905 
   2906                 bytes = sharedData.mbcs.fromUnicodeBytes;
   2907                 ints = sharedData.mbcs.fromUnicodeInts;
   2908                 if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2909                     chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
   2910                 } else {
   2911                     chars = sharedData.mbcs.fromUnicodeChars;
   2912                 }
   2913 
   2914                 // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;
   2915 
   2916                 /* get the converter state from UConverter */
   2917                 c = fromUChar32;
   2918 
   2919                 if (outputType == MBCS_OUTPUT_2_SISO) {
   2920                     prevLength = fromUnicodeStatus;
   2921                     if (prevLength == 0) {
   2922                         /* set the real value */
   2923                         prevLength = 1;
   2924                     }
   2925                 } else {
   2926                     /* prevent fromUnicodeStatus from being set to something non-0 */
   2927                     prevLength = 0;
   2928                 }
   2929 
   2930                 /* sourceIndex=-1 if the current character began in the previous buffer */
   2931                 prevSourceIndex = -1;
   2932                 sourceIndex = c == 0 ? 0 : -1;
   2933                 nextSourceIndex = 0;
   2934 
   2935                 /* Get the SI/SO character for the converter */
   2936                 si_value_length = getSISOBytes(SISO_Option.SI, options, si_value);
   2937                 so_value_length = getSISOBytes(SISO_Option.SO, options, so_value);
   2938 
   2939                 /* conversion loop */
   2940                 /*
   2941                  * This is another piece of ugly code: A goto into the loop if the converter state contains a first
   2942                  * surrogate from the previous function call. It saves me to check in each loop iteration a check of
   2943                  * if(c==0) and duplicating the trail-surrogate-handling code in the else branch of that check. I could
   2944                  * not find any other way to get around this other than using a function call for the conversion and
   2945                  * callback, which would be even more inefficient.
   2946                  *
   2947                  * Markus Scherer 2000-jul-19
   2948                  */
   2949                 boolean doloop = true;
   2950                 boolean doread = true;
   2951                 if (c != 0 && target.hasRemaining()) {
   2952                     if (UTF16.isLeadSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   2953                         // c is a lead surrogate, read another input
   2954                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
   2955                                 prevSourceIndex, prevLength);
   2956                         doloop = getTrail(source, target, uniMask, x, flush, cr);
   2957                         doread = x.doread;
   2958                         c = x.c;
   2959                         sourceArrayIndex = x.sourceArrayIndex;
   2960                         sourceIndex = x.sourceIndex;
   2961                         nextSourceIndex = x.nextSourceIndex;
   2962                         prevSourceIndex = x.prevSourceIndex;
   2963                         prevLength = x.prevLength;
   2964                     } else {
   2965                         // c is not a lead surrogate, do not read another input
   2966                         doread = false;
   2967                     }
   2968                 }
   2969 
   2970                 if (doloop) {
   2971                     while (!doread || sourceArrayIndex < source.limit()) {
   2972                         /*
   2973                          * This following test is to see if available input would overflow the output. It does not catch
   2974                          * output of more than one byte that overflows as a result of a multi-byte character or callback
   2975                          * output from the last source character. Therefore, those situations also test for overflows
   2976                          * and will then break the loop, too.
   2977                          */
   2978                         if (target.hasRemaining()) {
   2979                             /*
   2980                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
   2981                              * surrogate pair for a "supplementary code point".
   2982                              */
   2983 
   2984                             if (doread) {
   2985                                 // doread might be false only on the first looping
   2986 
   2987                                 c = source.get(sourceArrayIndex++);
   2988                                 ++nextSourceIndex;
   2989 
   2990                                 /*
   2991                                  * This also tests if the codepage maps single surrogates. If it does, then surrogates
   2992                                  * are not paired but mapped separately. Note that in this case unmatched surrogates are
   2993                                  * not detected.
   2994                                  */
   2995                                 if (UTF16.isSurrogate((char) c)
   2996                                         && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   2997                                     if (UTF16.isLeadSurrogate((char) c)) {
   2998                                         // getTrail:
   2999                                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
   3000                                                 nextSourceIndex, prevSourceIndex, prevLength);
   3001                                         doloop = getTrail(source, target, uniMask, x, flush, cr);
   3002                                         c = x.c;
   3003                                         sourceArrayIndex = x.sourceArrayIndex;
   3004                                         sourceIndex = x.sourceIndex;
   3005                                         nextSourceIndex = x.nextSourceIndex;
   3006                                         prevSourceIndex = x.prevSourceIndex;
   3007 
   3008                                         if (x.doread) {
   3009                                             if (doloop)
   3010                                                 continue;
   3011                                             else
   3012                                                 break;
   3013                                         }
   3014                                     } else {
   3015                                         /* this is an unmatched trail code unit (2nd surrogate) */
   3016                                         /* callback(illegal) */
   3017                                         cr[0] = CoderResult.malformedForLength(1);
   3018                                         break;
   3019                                     }
   3020                                 }
   3021                             } else {
   3022                                 doread = true;
   3023                             }
   3024                             /* convert the Unicode code point in c into codepage bytes */
   3025 
   3026                             /*
   3027                              * The basic lookup is a triple-stage compact array (trie) lookup. For details see the
   3028                              * beginning of this file.
   3029                              *
   3030                              * Single-byte codepages are handled with a different data structure by _MBCSSingle...
   3031                              * functions.
   3032                              *
   3033                              * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are
   3034                              * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0
   3035                              * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are
   3036                              * flags for which of the 16 characters in the block are roundtrip-assigned.
   3037                              *
   3038                              * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as
   3039                              * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in
   3040                              * big-endian order.
   3041                              *
   3042                              * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest
   3043                              * byte sequences, the first two bytes in this third stage indicate with their 7th bits
   3044                              * whether these bytes are to be written directly or actually need to be preceeded by one of
   3045                              * the two Single-Shift codes. With this, the third stage stores one byte fewer per
   3046                              * character than the actual maximum length of EUC byte sequences.
   3047                              *
   3048                              * Other than that, leading zero bytes are removed and the other bytes output. A single zero
   3049                              * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
   3050                              * support zero byte output as a fallback, and also does not allow output of leading zeros.
   3051                              */
   3052                             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
   3053 
   3054                             /* get the bytes and the length for the output */
   3055                             switch (outputType) {
   3056                             /* This is handled above with the method cnvMBCSDoubleFromUnicodeWithOffsets() */
   3057                             /* case MBCS_OUTPUT_2:
   3058                                 value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
   3059                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
   3060                                     length = 1;
   3061                                 } else {
   3062                                     length = 2;
   3063                                 }
   3064                                 break; */
   3065                             case MBCS_OUTPUT_2_SISO:
   3066                                 /* 1/2-byte stateful with Shift-In/Shift-Out */
   3067                                 /*
   3068                                  * Save the old state in the converter object right here, then change the local
   3069                                  * prevLength state variable if necessary. Then, if this character turns out to be
   3070                                  * unassigned or a fallback that is not taken, the callback code must not save the new
   3071                                  * state in the converter because the new state is for a character that is not output.
   3072                                  * However, the callback must still restore the state from the converter in case the
   3073                                  * callback function changed it for its output.
   3074                                  */
   3075                                 fromUnicodeStatus = prevLength; /* save the old state */
   3076                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   3077                                 if (value <= 0xff) {
   3078                                     if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
   3079                                         /* no mapping, leave value==0 */
   3080                                         length = 0;
   3081                                     } else if (prevLength <= 1) {
   3082                                         length = 1;
   3083                                     } else {
   3084                                         /* change from double-byte mode to single-byte */
   3085                                         if (si_value_length == 1) {
   3086                                             value|=si_value[0]<<8;
   3087                                             length = 2;
   3088                                         } else if (si_value_length == 2) {
   3089                                             value|=si_value[1]<<8;
   3090                                             value|=si_value[0]<<16;
   3091                                             length = 3;
   3092                                         }
   3093                                         prevLength = 1;
   3094                                     }
   3095                                 } else {
   3096                                     if (prevLength == 2) {
   3097                                         length = 2;
   3098                                     } else {
   3099                                         /* change from single-byte mode to double-byte */
   3100                                         if (so_value_length == 1) {
   3101                                             value|=so_value[0]<<16;
   3102                                             length = 3;
   3103                                         } else if (so_value_length == 2) {
   3104                                             value|=so_value[1]<<16;
   3105                                             value|=so_value[0]<<24;
   3106                                             length = 4;
   3107                                         }
   3108                                         prevLength = 2;
   3109                                     }
   3110                                 }
   3111                                 break;
   3112                             case MBCS_OUTPUT_DBCS_ONLY:
   3113                                 /* table with single-byte results, but only DBCS mappings used */
   3114                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   3115                                 if (value <= 0xff) {
   3116                                     /* no mapping or SBCS result, not taken for DBCS-only */
   3117                                     value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
   3118                                     length = 0;
   3119                                 } else {
   3120                                     length = 2;
   3121                                 }
   3122                                 break;
   3123                             case MBCS_OUTPUT_3:
   3124                                 pArray = bytes;
   3125                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
   3126                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
   3127                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
   3128                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
   3129                                 if (value <= 0xff) {
   3130                                     length = 1;
   3131                                 } else if (value <= 0xffff) {
   3132                                     length = 2;
   3133                                 } else {
   3134                                     length = 3;
   3135                                 }
   3136                                 break;
   3137                             case MBCS_OUTPUT_4:
   3138                                 value = MBCS_VALUE_4_FROM_STAGE_2(ints, stage2Entry, c);
   3139                                 if (value < 0) {
   3140                                     // Half of the 4-byte values look negative in a signed int.
   3141                                     length = 4;
   3142                                 } else if (value <= 0xff) {
   3143                                     length = 1;
   3144                                 } else if (value <= 0xffff) {
   3145                                     length = 2;
   3146                                 } else if (value <= 0xffffff) {
   3147                                     length = 3;
   3148                                 } else {
   3149                                     length = 4;
   3150                                 }
   3151                                 break;
   3152                             case MBCS_OUTPUT_3_EUC:
   3153                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   3154                                 /* EUC 16-bit fixed-length representation */
   3155                                 if (value <= 0xff) {
   3156                                     length = 1;
   3157                                 } else if ((value & 0x8000) == 0) {
   3158                                     value |= 0x8e8000;
   3159                                     length = 3;
   3160                                 } else if ((value & 0x80) == 0) {
   3161                                     value |= 0x8f0080;
   3162                                     length = 3;
   3163                                 } else {
   3164                                     length = 2;
   3165                                 }
   3166                                 break;
   3167                             case MBCS_OUTPUT_4_EUC:
   3168                                 pArray = bytes;
   3169                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
   3170                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
   3171                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
   3172                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
   3173                                 /* EUC 16-bit fixed-length representation applied to the first two bytes */
   3174                                 if (value <= 0xff) {
   3175                                     length = 1;
   3176                                 } else if (value <= 0xffff) {
   3177                                     length = 2;
   3178                                 } else if ((value & 0x800000) == 0) {
   3179                                     value |= 0x8e800000;
   3180                                     length = 4;
   3181                                 } else if ((value & 0x8000) == 0) {
   3182                                     value |= 0x8f008000;
   3183                                     length = 4;
   3184                                 } else {
   3185                                     length = 3;
   3186                                 }
   3187                                 break;
   3188                             default:
   3189                                 /* must not occur */
   3190                                 /*
   3191                                  * To avoid compiler warnings that value & length may be used without having been
   3192                                  * initialized, we set them here. In reality, this is unreachable code. Not having a
   3193                                  * default branch also causes warnings with some compilers.
   3194                                  */
   3195                                 value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
   3196                                 length = 0;
   3197                                 break;
   3198                             }
   3199 
   3200                             /* is this code point assigned, or do we use fallbacks? */
   3201                             if (gotoUnassigned || (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0)))) {
   3202                                 gotoUnassigned = false;
   3203                                 /*
   3204                                  * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
   3205                                  * with this data structure for fallback output to be a zero byte.
   3206                                  */
   3207 
   3208                                 // unassigned:
   3209                                 SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
   3210                                         prevSourceIndex, prevLength);
   3211                                 doloop = unassigned(source, target, offsets, x, flush, cr);
   3212                                 c = x.c;
   3213                                 sourceArrayIndex = x.sourceArrayIndex;
   3214                                 sourceIndex = x.sourceIndex;
   3215                                 nextSourceIndex = x.nextSourceIndex;
   3216                                 prevSourceIndex = x.prevSourceIndex;
   3217                                 prevLength = x.prevLength;
   3218                                 if (doloop)
   3219                                     continue;
   3220                                 else
   3221                                     break;
   3222                             }
   3223 
   3224                             /* write the output character bytes from value and length */
   3225                             /* from the first if in the loop we know that targetCapacity>0 */
   3226                             if (length <= target.remaining()) {
   3227                                 switch (length) {
   3228                                 /* each branch falls through to the next one */
   3229                                 case 4:
   3230                                     target.put((byte) (value >>> 24));
   3231                                     if (offsets != null) {
   3232                                         offsets.put(sourceIndex);
   3233                                     }
   3234                                 case 3:
   3235                                     target.put((byte) (value >>> 16));
   3236                                     if (offsets != null) {
   3237                                         offsets.put(sourceIndex);
   3238                                     }
   3239                                 case 2:
   3240                                     target.put((byte) (value >>> 8));
   3241                                     if (offsets != null) {
   3242                                         offsets.put(sourceIndex);
   3243                                     }
   3244                                 case 1:
   3245                                     target.put((byte) value);
   3246                                     if (offsets != null) {
   3247                                         offsets.put(sourceIndex);
   3248                                     }
   3249                                 default:
   3250                                     /* will never occur */
   3251                                     break;
   3252                                 }
   3253                             } else {
   3254                                 int errorBufferArrayIndex;
   3255 
   3256                                 /*
   3257                                  * We actually do this backwards here: In order to save an intermediate variable, we
   3258                                  * output first to the overflow buffer what does not fit into the regular target.
   3259                                  */
   3260                                 /* we know that 1<=targetCapacity<length<=4 */
   3261                                 length -= target.remaining();
   3262 
   3263                                 errorBufferArrayIndex = 0;
   3264                                 switch (length) {
   3265                                 /* each branch falls through to the next one */
   3266                                 case 3:
   3267                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 16);
   3268                                 case 2:
   3269                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 8);
   3270                                 case 1:
   3271                                     errorBuffer[errorBufferArrayIndex] = (byte) value;
   3272                                 default:
   3273                                     /* will never occur */
   3274                                     break;
   3275                                 }
   3276                                 errorBufferLength = (byte) length;
   3277 
   3278                                 /* now output what fits into the regular target */
   3279                                 value >>>= 8 * length; /* length was reduced by targetCapacity */
   3280                                 switch (target.remaining()) {
   3281                                 /* each branch falls through to the next one */
   3282                                 case 3:
   3283                                     target.put((byte) (value >>> 16));
   3284                                     if (offsets != null) {
   3285                                         offsets.put(sourceIndex);
   3286                                     }
   3287                                 case 2:
   3288                                     target.put((byte) (value >>> 8));
   3289                                     if (offsets != null) {
   3290                                         offsets.put(sourceIndex);
   3291                                     }
   3292                                 case 1:
   3293                                     target.put((byte) value);
   3294                                     if (offsets != null) {
   3295                                         offsets.put(sourceIndex);
   3296                                     }
   3297                                 default:
   3298                                     /* will never occur */
   3299                                     break;
   3300                                 }
   3301 
   3302                                 /* target overflow */
   3303                                 cr[0] = CoderResult.OVERFLOW;
   3304                                 c = 0;
   3305                                 break;
   3306                             }
   3307 
   3308                             /* normal end of conversion: prepare for a new character */
   3309                             c = 0;
   3310                             if (offsets != null) {
   3311                                 prevSourceIndex = sourceIndex;
   3312                                 sourceIndex = nextSourceIndex;
   3313                             }
   3314                             continue;
   3315                         } else {
   3316                             /* target is full */
   3317                             cr[0] = CoderResult.OVERFLOW;
   3318                             break;
   3319                         }
   3320                     }
   3321                 }
   3322 
   3323                 /*
   3324                  * the end of the input stream and detection of truncated input are handled by the framework, but for
   3325                  * EBCDIC_STATEFUL conversion we need to emit an SI at the very end
   3326                  *
   3327                  * conditions: successful EBCDIC_STATEFUL in DBCS mode end of input and no truncated input
   3328                  */
   3329                 if (outputType == MBCS_OUTPUT_2_SISO && prevLength == 2 && flush && sourceArrayIndex >= source.limit()
   3330                         && c == 0) {
   3331 
   3332                     /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
   3333                     if (target.hasRemaining()) {
   3334                         target.put(si_value[0]);
   3335                         if (si_value_length == 2) {
   3336                             if (target.remaining() > 0) {
   3337                                 target.put(si_value[1]);
   3338                             } else {
   3339                                 errorBuffer[0] = si_value[1];
   3340                                 errorBufferLength = 1;
   3341                                 cr[0] = CoderResult.OVERFLOW;
   3342                             }
   3343                         }
   3344                         if (offsets != null) {
   3345                             /* set the last source character's index (sourceIndex points at sourceLimit now) */
   3346                             offsets.put(prevSourceIndex);
   3347                         }
   3348                     } else {
   3349                         /* target is full */
   3350                         errorBuffer[0] = si_value[0];
   3351                         if (si_value_length == 2) {
   3352                             errorBuffer[1] = si_value[1];
   3353                         }
   3354                         errorBufferLength = si_value_length;
   3355                         cr[0] = CoderResult.OVERFLOW;
   3356                     }
   3357                     prevLength = 1; /* we switched into SBCS */
   3358                 }
   3359 
   3360                 /* set the converter state back into UConverter */
   3361                 fromUChar32 = c;
   3362                 fromUnicodeStatus = prevLength;
   3363 
   3364                 source.position(sourceArrayIndex);
   3365             } catch (BufferOverflowException ex) {
   3366                 cr[0] = CoderResult.OVERFLOW;
   3367             }
   3368 
   3369             return cr[0];
   3370         }
   3371 
   3372         /*
   3373          * This is another simple conversion function for internal use by other conversion implementations. It does not
   3374          * use the converter state nor call callbacks. It does not handle the EBCDIC swaplfnl option (set in
   3375          * UConverter). It handles conversion extensions but not GB 18030.
   3376          *
   3377          * It converts one single Unicode code point into codepage bytes, encoded as one 32-bit value. The function
   3378          * returns the number of bytes in *pValue: 1..4 the number of bytes in *pValue 0 unassigned (*pValue undefined)
   3379          * -1 illegal (currently not used, *pValue undefined)
   3380          *
   3381          * *pValue will contain the resulting bytes with the last byte in bits 7..0, the second to last byte in bits
   3382          * 15..8, etc. Currently, the function assumes but does not check that 0<=c<=0x10ffff.
   3383          */
   3384         int fromUChar32(int c, int[] pValue, boolean isUseFallback) {
   3385             // #if 0
   3386             // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
   3387             // const uint8_t *p;
   3388             // #endif
   3389 
   3390             char[] table;
   3391             int stage2Entry;
   3392             int value;
   3393             int length;
   3394             int p;
   3395 
   3396             /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   3397             if (c <= 0xffff || sharedData.mbcs.hasSupplementary()) {
   3398                 table = sharedData.mbcs.fromUnicodeTable;
   3399 
   3400                 /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   3401                 if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) {
   3402                     value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
   3403                     /* is this code point assigned, or do we use fallbacks? */
   3404                     if (isUseFallback ? value >= 0x800 : value >= 0xc00) {
   3405                         pValue[0] = value & 0xff;
   3406                         return 1;
   3407                     }
   3408                 } else /* outputType!=MBCS_OUTPUT_1 */{
   3409                     int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
   3410                     stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
   3411 
   3412                     /* get the bytes and the length for the output */
   3413                     switch (sharedData.mbcs.outputType) {
   3414                     case MBCS_OUTPUT_2:
   3415                         value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeChars, stage2Entry, c);
   3416                         if (value <= 0xff) {
   3417                             length = 1;
   3418                         } else {
   3419                             length = 2;
   3420                         }
   3421                         break;
   3422                     // #if 0
   3423                     // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
   3424                     // case MBCS_OUTPUT_DBCS_ONLY:
   3425                     // /* table with single-byte results, but only DBCS mappings used */
   3426                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3427                     // if(value<=0xff) {
   3428                     // /* no mapping or SBCS result, not taken for DBCS-only */
   3429                     // value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
   3430                     // length=0;
   3431                     // } else {
   3432                     // length=2;
   3433                     // }
   3434                     // break;
   3435                     case MBCS_OUTPUT_3:
   3436                         byte[] bytes = sharedData.mbcs.fromUnicodeBytes;
   3437                         p = CharsetMBCS.MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
   3438                         value = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) |
   3439                             ((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) |
   3440                             (bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK);
   3441                         if (value <= 0xff) {
   3442                             length = 1;
   3443                         } else if (value <= 0xffff) {
   3444                             length = 2;
   3445                         } else {
   3446                             length = 3;
   3447                         }
   3448                         break;
   3449                     // case MBCS_OUTPUT_4:
   3450                     // value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3451                     // if(value<=0xff) {
   3452                     // length=1;
   3453                     // } else if(value<=0xffff) {
   3454                     // length=2;
   3455                     // } else if(value<=0xffffff) {
   3456                     // length=3;
   3457                     // } else {
   3458                     // length=4;
   3459                     // }
   3460                     // break;
   3461                     // case MBCS_OUTPUT_3_EUC:
   3462                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3463                     // /* EUC 16-bit fixed-length representation */
   3464                     // if(value<=0xff) {
   3465                     // length=1;
   3466                     // } else if((value&0x8000)==0) {
   3467                     // value|=0x8e8000;
   3468                     // length=3;
   3469                     // } else if((value&0x80)==0) {
   3470                     // value|=0x8f0080;
   3471                     // length=3;
   3472                     // } else {
   3473                     // length=2;
   3474                     // }
   3475                     // break;
   3476                     // case MBCS_OUTPUT_4_EUC:
   3477                     // p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3478                     // value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   3479                     // /* EUC 16-bit fixed-length representation applied to the first two bytes */
   3480                     // if(value<=0xff) {
   3481                     // length=1;
   3482                     // } else if(value<=0xffff) {
   3483                     // length=2;
   3484                     // } else if((value&0x800000)==0) {
   3485                     // value|=0x8e800000;
   3486                     // length=4;
   3487                     // } else if((value&0x8000)==0) {
   3488                     // value|=0x8f008000;
   3489                     // length=4;
   3490                     // } else {
   3491                     // length=3;
   3492                     // }
   3493                     // break;
   3494                     // #endif
   3495                     default:
   3496                         /* must not occur */
   3497                         return -1;
   3498                     }
   3499 
   3500                     /* is this code point assigned, or do we use fallbacks? */
   3501                     if (MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
   3502                             || (CharsetEncoderICU.isFromUUseFallback(isUseFallback, c) && value != 0)) {
   3503                         /*
   3504                          * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way with
   3505                          * this data structure for fallback output to be a zero byte.
   3506                          */
   3507                         /* assigned */
   3508                         pValue[0] = value;
   3509                         return length;
   3510                     }
   3511                 }
   3512             }
   3513 
   3514             if (sharedData.mbcs.extIndexes != null) {
   3515                 length = simpleMatchFromU(c, pValue, isUseFallback);
   3516                 return length >= 0 ? length : -length; /* return abs(length); */
   3517             }
   3518 
   3519             /* unassigned */
   3520             return 0;
   3521         }
   3522 
   3523         /*
   3524          * continue partial match with new input, requires cnv->preFromUFirstCP>=0 never called for simple,
   3525          * single-character conversion
   3526          */
   3527         private CoderResult continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush,
   3528                 int srcIndex) {
   3529             CoderResult cr = CoderResult.UNDERFLOW;
   3530             int[] value = new int[1];
   3531             int match;
   3532 
   3533             match = matchFromU(preFromUFirstCP, preFromUArray, preFromUBegin, preFromULength, source, value, useFallback, flush);
   3534             if (match >= 2) {
   3535                 match -= 2; /* remove 2 for the initial code point */
   3536 
   3537                 if (match >= preFromULength) {
   3538                     /* advance src pointer for the consumed input */
   3539                     source.position(source.position() + match - preFromULength);
   3540                     preFromULength = 0;
   3541                 } else {
   3542                     /* the match did not use all of preFromU[] - keep the rest for replay */
   3543                     int length = preFromULength - match;
   3544                     System.arraycopy(preFromUArray, preFromUBegin + match, preFromUArray, preFromUBegin, length);
   3545                     preFromULength = (byte) -length;
   3546                 }
   3547 
   3548                 /* finish the partial match */
   3549                 preFromUFirstCP = UConverterConstants.U_SENTINEL;
   3550 
   3551                 /* write result */
   3552                 writeFromU(value[0], target, offsets, srcIndex);
   3553             } else if (match < 0) {
   3554                 /* save state for partial match */
   3555                 int sArrayIndex;
   3556                 int j;
   3557 
   3558                 /* just _append_ the newly consumed input to preFromU[] */
   3559                 sArrayIndex = source.position();
   3560                 match = -match - 2; /* remove 2 for the initial code point */
   3561                 for (j = preFromULength; j < match; ++j) {
   3562                     preFromUArray[j] = source.get(sArrayIndex++);
   3563                 }
   3564                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
   3565                 preFromULength = (byte) match;
   3566             } else { /* match==0 or 1 */
   3567                 /*
   3568                  * no match
   3569                  *
   3570                  * We need to split the previous input into two parts:
   3571                  *
   3572                  * 1. The first code point is unmappable - that's how we got into trying the extension data in the first
   3573                  * place. We need to move it from the preFromU buffer to the error buffer, set an error code, and
   3574                  * prepare the rest of the previous input for 2.
   3575                  *
   3576                  * 2. The rest of the previous input must be converted once we come back from the callback for the first
   3577                  * code point. At that time, we have to try again from scratch to convert these input characters. The
   3578                  * replay will be handled by the ucnv.c conversion code.
   3579                  */
   3580 
   3581                 if (match == 1) {
   3582                     /* matched, no mapping but request for <subchar1> */
   3583                     useSubChar1 = true;
   3584                 }
   3585 
   3586                 /* move the first code point to the error field */
   3587                 fromUChar32 = preFromUFirstCP;
   3588                 preFromUFirstCP = UConverterConstants.U_SENTINEL;
   3589 
   3590                 /* mark preFromU for replay */
   3591                 preFromULength = (byte) -preFromULength;
   3592 
   3593                 /* set the error code for unassigned */
   3594                 // TODO: figure out what the unmappable length really should be
   3595                 cr = CoderResult.unmappableForLength(1);
   3596             }
   3597             return cr;
   3598         }
   3599 
   3600         /**
   3601          * @param cx
   3602          *            pointer to extension data; if NULL, returns 0
   3603          * @param firstCP
   3604          *            the first code point before all the other UChars
   3605          * @param pre
   3606          *            UChars that must match; !initialMatch: partial match with them
   3607          * @param preLength
   3608          *            length of pre, >=0
   3609          * @param src
   3610          *            UChars that can be used to complete a match
   3611          * @param srcLength
   3612          *            length of src, >=0
   3613          * @param pMatchValue
   3614          *            [out] output result value for the match from the data structure
   3615          * @param useFallback
   3616          *            "use fallback" flag, usually from cnv->useFallback
   3617          * @param flush
   3618          *            TRUE if the end of the input stream is reached
   3619          * @return >1: matched, return value=total match length (number of input units matched) 1: matched, no mapping
   3620          *         but request for <subchar1> (only for the first code point) 0: no match <0: partial match, return
   3621          *         value=negative total match length (partial matches are never returned for flush==TRUE) (partial
   3622          *         matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) the matchLength is 2 if only
   3623          *         firstCP matched, and >2 if firstCP and further code units matched
   3624          */
   3625         // static int32_t ucnv_extMatchFromU(const int32_t *cx, UChar32 firstCP, const UChar *pre, int32_t preLength,
   3626         // const UChar *src, int32_t srcLength, uint32_t *pMatchValue, UBool useFallback, UBool flush)
   3627         private int matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source,
   3628                 int[] pMatchValue, boolean isUseFallback, boolean flush) {
   3629             ByteBuffer cx = sharedData.mbcs.extIndexes;
   3630 
   3631             CharBuffer stage12, stage3;
   3632             IntBuffer stage3b;
   3633 
   3634             CharBuffer fromUTableUChars, fromUSectionUChars;
   3635             IntBuffer fromUTableValues, fromUSectionValues;
   3636 
   3637             int value, matchValue;
   3638             int i, j, index, length, matchLength;
   3639             char c;
   3640 
   3641             if (cx == null) {
   3642                 return 0; /* no extension data, no match */
   3643             }
   3644 
   3645             /* trie lookup of firstCP */
   3646             index = firstCP >>> 10; /* stage 1 index */
   3647             if (index >= cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) {
   3648                 return 0; /* the first code point is outside the trie */
   3649             }
   3650 
   3651             stage12 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);
   3652             stage3 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);
   3653             index = FROM_U(stage12, stage3, index, firstCP);
   3654 
   3655             stage3b = (IntBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);
   3656             value = stage3b.get(stage3b.position() + index);
   3657             if (value == 0) {
   3658                 return 0;
   3659             }
   3660 
   3661             if (TO_U_IS_PARTIAL(value)) {
   3662                 /* partial match, enter the loop below */
   3663                 index = FROM_U_GET_PARTIAL_INDEX(value);
   3664 
   3665                 /* initialize */
   3666                 fromUTableUChars = (CharBuffer) ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);
   3667                 fromUTableValues = (IntBuffer) ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);
   3668 
   3669                 matchValue = 0;
   3670                 i = j = matchLength = 0;
   3671 
   3672                 /* we must not remember fallback matches when not using fallbacks */
   3673 
   3674                 /* match input units until there is a full match or the input is consumed */
   3675                 for (;;) {
   3676                     /* go to the next section */
   3677                     int oldpos = fromUTableUChars.position();
   3678                     fromUSectionUChars = ((CharBuffer) fromUTableUChars.position(index)).slice();
   3679                     fromUTableUChars.position(oldpos);
   3680                     oldpos = fromUTableValues.position();
   3681                     fromUSectionValues = ((IntBuffer) fromUTableValues.position(index)).slice();
   3682                     fromUTableValues.position(oldpos);
   3683 
   3684                     /* read first pair of the section */
   3685                     length = fromUSectionUChars.get();
   3686                     value = fromUSectionValues.get();
   3687                     if (value != 0 && (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP))) {
   3688                         /* remember longest match so far */
   3689                         matchValue = value;
   3690                         matchLength = 2 + i + j;
   3691                     }
   3692 
   3693                     /* match pre[] then src[] */
   3694                     if (i < preLength) {
   3695                         c = preArray[preArrayBegin + i++];
   3696                     } else if (source != null && j < source.remaining()) {
   3697                         c = source.get(source.position() + j++);
   3698                     } else {
   3699                         /* all input consumed, partial match */
   3700                         if (flush || (length = (i + j)) > MAX_UCHARS) {
   3701                             /*
   3702                              * end of the entire input stream, stop with the longest match so far or: partial match must
   3703                              * not be longer than UCNV_EXT_MAX_UCHARS because it must fit into state buffers
   3704                              */
   3705                             break;
   3706                         } else {
   3707                             /* continue with more input next time */
   3708                             return -(2 + length);
   3709                         }
   3710                     }
   3711 
   3712                     /* search for the current UChar */
   3713                     index = findFromU(fromUSectionUChars, length, c);
   3714                     if (index < 0) {
   3715                         /* no match here, stop with the longest match so far */
   3716                         break;
   3717                     } else {
   3718                         value = fromUSectionValues.get(fromUSectionValues.position() + index);
   3719                         if (FROM_U_IS_PARTIAL(value)) {
   3720                             /* partial match, continue */
   3721                             index = FROM_U_GET_PARTIAL_INDEX(value);
   3722                         } else {
   3723                             if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
   3724                                 /* full match, stop with result */
   3725                                 matchValue = value;
   3726                                 matchLength = 2 + i + j;
   3727                             } else {
   3728                                 /* full match on fallback not taken, stop with the longest match so far */
   3729                             }
   3730                             break;
   3731                         }
   3732                     }
   3733                 }
   3734 
   3735                 if (matchLength == 0) {
   3736                     /* no match at all */
   3737                     return 0;
   3738                 }
   3739             } else /* result from firstCP trie lookup */{
   3740                 if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
   3741                     /* full match, stop with result */
   3742                     matchValue = value;
   3743                     matchLength = 2;
   3744                 } else {
   3745                     /* fallback not taken */
   3746                     return 0;
   3747                 }
   3748             }
   3749 
   3750             if ((matchValue & FROM_U_RESERVED_MASK) != 0) {
   3751                 /* do not interpret values with reserved bits used, for forward compatibility */
   3752                 return 0;
   3753             }
   3754 
   3755             /* return result */
   3756             if (matchValue == FROM_U_SUBCHAR1) {
   3757                 return 1; /* assert matchLength==2 */
   3758             }
   3759 
   3760             pMatchValue[0] = FROM_U_MASK_ROUNDTRIP(matchValue);
   3761             return matchLength;
   3762         }
   3763 
   3764         private int simpleMatchFromU(int cp, int[] pValue, boolean isUseFallback) {
   3765             int[] value = new int[1];
   3766             int match; // signed
   3767 
   3768             /* try to match */
   3769             match = matchFromU(cp, null, 0, 0, null, value, isUseFallback, true);
   3770             if (match >= 2) {
   3771                 /* write result for simple, single-character conversion */
   3772                 int length;
   3773                 boolean isRoundtrip;
   3774 
   3775                 isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]);
   3776                 length = FROM_U_GET_LENGTH(value[0]);
   3777                 value[0] = FROM_U_GET_DATA(value[0]);
   3778 
   3779                 if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) {
   3780                     pValue[0] = value[0];
   3781                     return isRoundtrip ? length : -length;
   3782                     // #if 0 /* not currently used */
   3783                     // } else if(length==4) {
   3784                     // /* de-serialize a 4-byte result */
   3785                     // const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
   3786                     // *pValue=
   3787                     // ((uint32_t)result[0]<<24)|
   3788                     // ((uint32_t)result[1]<<16)|
   3789                     // ((uint32_t)result[2]<<8)|
   3790                     // result[3];
   3791                     // return isRoundtrip ? 4 : -4;
   3792                     // #endif
   3793                 }
   3794             }
   3795 
   3796             /*
   3797              * return no match because - match>1 && resultLength>4: result too long for simple conversion - match==1: no
   3798              * match found, <subchar1> preferred - match==0: no match found in the first place - match<0: partial
   3799              * match, not supported for simple conversion (and flush==TRUE)
   3800              */
   3801             return 0;
   3802         }
   3803 
   3804         @SuppressWarnings("fallthrough")
   3805         private CoderResult writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex) {
   3806             ByteBuffer cx = sharedData.mbcs.extIndexes;
   3807 
   3808             byte bufferArray[] = new byte[1 + MAX_BYTES];
   3809             int bufferArrayIndex = 0;
   3810             byte[] resultArray;
   3811             int resultArrayIndex;
   3812             int length, prevLength;
   3813 
   3814             length = FROM_U_GET_LENGTH(value);
   3815             value = FROM_U_GET_DATA(value);
   3816 
   3817             /* output the result */
   3818             if (length <= FROM_U_MAX_DIRECT_LENGTH) {
   3819                 /*
   3820                  * Generate a byte array and then write it below. This is not the fastest possible way, but it should be
   3821                  * ok for extension mappings, and it is much simpler. Offset and overflow handling are only done once
   3822                  * this way.
   3823                  */
   3824                 int p = bufferArrayIndex + 1; /* reserve buffer[0] for shiftByte below */
   3825                 switch (length) {
   3826                 case 3:
   3827                     bufferArray[p++] = (byte) (value >>> 16);
   3828                 case 2:
   3829                     bufferArray[p++] = (byte) (value >>> 8);
   3830                 case 1:
   3831                     bufferArray[p++] = (byte) value;
   3832                 default:
   3833                     break; /* will never occur */
   3834                 }
   3835                 resultArray = bufferArray;
   3836                 resultArrayIndex = bufferArrayIndex + 1;
   3837             } else {
   3838                 byte[] slice = new byte[length];
   3839 
   3840                 ByteBuffer bb = ((ByteBuffer) ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class));
   3841                 bb.position(value);
   3842                 bb.get(slice, 0, slice.length);
   3843 
   3844                 resultArray = slice;
   3845                 resultArrayIndex = 0;
   3846             }
   3847 
   3848             /* with correct data we have length>0 */
   3849 
   3850             if ((prevLength = fromUnicodeStatus) != 0) {
   3851                 /* handle SI/SO stateful output */
   3852                 byte shiftByte;
   3853 
   3854                 if (prevLength > 1 && length == 1) {
   3855                     /* change from double-byte mode to single-byte */
   3856                     shiftByte = (byte) UConverterConstants.SI;
   3857                     fromUnicodeStatus = 1;
   3858                 } else if (prevLength == 1 && length > 1) {
   3859                     /* change from single-byte mode to double-byte */
   3860                     shiftByte = (byte) UConverterConstants.SO;
   3861                     fromUnicodeStatus = 2;
   3862                 } else {
   3863                     shiftByte = 0;
   3864                 }
   3865 
   3866                 if (shiftByte != 0) {
   3867                     /* prepend the shift byte to the result bytes */
   3868                     bufferArray[0] = shiftByte;
   3869                     if (resultArray != bufferArray || resultArrayIndex != bufferArrayIndex + 1) {
   3870                         System.arraycopy(resultArray, resultArrayIndex, bufferArray, bufferArrayIndex + 1, length);
   3871                     }
   3872                     resultArray = bufferArray;
   3873                     resultArrayIndex = bufferArrayIndex;
   3874                     ++length;
   3875                 }
   3876             }
   3877 
   3878             return fromUWriteBytes(this, resultArray, resultArrayIndex, length, target, offsets, srcIndex);
   3879         }
   3880 
   3881         /*
   3882          * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written
   3883          * to the target
   3884          */
   3885         private int fromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
   3886                 int length, boolean flush, CoderResult[] cr) {
   3887             // ByteBuffer cx;
   3888 
   3889             useSubChar1 = false;
   3890 
   3891             if (sharedData.mbcs.extIndexes != null
   3892                     && initialMatchFromU(cp, source, target, offsets, sourceIndex, flush, cr)) {
   3893                 return 0; /* an extension mapping handled the input */
   3894             }
   3895 
   3896             /* GB 18030 */
   3897             if ((options & MBCS_OPTION_GB18030) != 0) {
   3898                 int[] range;
   3899                 int i;
   3900 
   3901                 for (i = 0; i < gb18030Ranges.length; ++i) {
   3902                     range = gb18030Ranges[i];
   3903                     if (range[0] <= cp && cp <= range[1]) {
   3904                         /* found the Unicode code point, output the four-byte sequence for it */
   3905                         int linear;
   3906                         byte bytes[] = new byte[4];
   3907 
   3908                         /* get the linear value of the first GB 18030 code in this range */
   3909                         linear = range[2] - LINEAR_18030_BASE;
   3910 
   3911                         /* add the offset from the beginning of the range */
   3912                         linear += (cp - range[0]);
   3913 
   3914                         bytes[3] = (byte) (0x30 + linear % 10);
   3915                         linear /= 10;
   3916                         bytes[2] = (byte) (0x81 + linear % 126);
   3917                         linear /= 126;
   3918                         bytes[1] = (byte) (0x30 + linear % 10);
   3919                         linear /= 10;
   3920                         bytes[0] = (byte) (0x81 + linear);
   3921 
   3922                         /* output this sequence */
   3923                         cr[0] = fromUWriteBytes(this, bytes, 0, 4, target, offsets, sourceIndex);
   3924                         return 0;
   3925                     }
   3926                 }
   3927             }
   3928 
   3929             /* no mapping */
   3930             cr[0] = CoderResult.unmappableForLength(length);
   3931             return cp;
   3932         }
   3933 
   3934         /*
   3935          * target<targetLimit; set error code for overflow
   3936          */
   3937         private boolean initialMatchFromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets,
   3938                 int srcIndex, boolean flush, CoderResult[] cr) {
   3939             int[] value = new int[1];
   3940             int match;
   3941 
   3942             /* try to match */
   3943             match = matchFromU(cp, null, 0, 0, source, value, useFallback, flush);
   3944 
   3945             /* reject a match if the result is a single byte for DBCS-only */
   3946             if (match >= 2
   3947                     && !(FROM_U_GET_LENGTH(value[0]) == 1 && sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY)) {
   3948                 /* advance src pointer for the consumed input */
   3949                 source.position(source.position() + match - 2); /* remove 2 for the initial code point */
   3950 
   3951                 /* write result to target */
   3952                 cr[0] = writeFromU(value[0], target, offsets, srcIndex);
   3953                 return true;
   3954             } else if (match < 0) {
   3955                 /* save state for partial match */
   3956                 int sArrayIndex;
   3957                 int j;
   3958 
   3959                 /* copy the first code point */
   3960                 preFromUFirstCP = cp;
   3961 
   3962                 /* now copy the newly consumed input */
   3963                 sArrayIndex = source.position();
   3964                 match = -match - 2; /* remove 2 for the initial code point */
   3965                 for (j = 0; j < match; ++j) {
   3966                     preFromUArray[j] = source.get(sArrayIndex++);
   3967                 }
   3968                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
   3969                 preFromULength = (byte) match;
   3970                 return true;
   3971             } else if (match == 1) {
   3972                 /* matched, no mapping but request for <subchar1> */
   3973                 useSubChar1 = true;
   3974                 return false;
   3975             } else /* match==0 no match */{
   3976                 return false;
   3977             }
   3978         }
   3979 
   3980         CoderResult cnvMBCSFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
   3981             // Just call encodeLoop to remove duplicate code.
   3982             return encodeLoop(source, target, offsets, flush);
   3983         }
   3984 
   3985         /*
   3986          * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages that map only to and from the
   3987          * BMP. In addition to single-byte/state optimizations, the offset calculations become much easier.
   3988          */
   3989         private CoderResult cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets,
   3990                 boolean flush) {
   3991 
   3992             CoderResult[] cr = { CoderResult.UNDERFLOW };
   3993 
   3994             int sourceArrayIndex, lastSource;
   3995             int targetCapacity, length;
   3996             char[] table;
   3997             char[] results;
   3998 
   3999             int c, sourceIndex;
   4000             char value, minValue;
   4001 
   4002             /* set up the local pointers */
   4003             sourceArrayIndex = source.position();
   4004             targetCapacity = target.remaining();
   4005             table = sharedData.mbcs.fromUnicodeTable;
   4006 
   4007             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   4008                 results = sharedData.mbcs.swapLFNLFromUnicodeChars;
   4009             } else {
   4010                 results = sharedData.mbcs.fromUnicodeChars;
   4011             }
   4012 
   4013             if (useFallback) {
   4014                 /* use all roundtrip and fallback results */
   4015                 minValue = 0x800;
   4016             } else {
   4017                 /* use only roundtrips and fallbacks from private-use characters */
   4018                 minValue = 0xc00;
   4019             }
   4020 
   4021             /* get the converter state from UConverter */
   4022             c = fromUChar32;
   4023 
   4024             /* sourceIndex=-1 if the current character began in the previous buffer */
   4025             sourceIndex = c == 0 ? 0 : -1;
   4026             lastSource = sourceArrayIndex;
   4027 
   4028             /*
   4029              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
   4030              * sourceLength and targetCapacity
   4031              */
   4032             length = source.limit() - sourceArrayIndex;
   4033             if (length < targetCapacity) {
   4034                 targetCapacity = length;
   4035             }
   4036 
   4037             boolean doloop = true;
   4038             if (c != 0 && targetCapacity > 0) {
   4039                 SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
   4040                 doloop = getTrailSingleBMP(source, x, cr);
   4041                 c = x.c;
   4042                 sourceArrayIndex = x.sourceArrayIndex;
   4043             }
   4044 
   4045             if (doloop) {
   4046                 while (targetCapacity > 0) {
   4047                     /*
   4048                      * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate pair
   4049                      * for a "supplementary code point".
   4050                      */
   4051                     c = source.get(sourceArrayIndex++);
   4052                     /*
   4053                      * Do not immediately check for single surrogates: Assume that they are unassigned and check for
   4054                      * them in that case. This speeds up the conversion of assigned characters.
   4055                      */
   4056                     /* convert the Unicode code point in c into codepage bytes */
   4057                     value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
   4058 
   4059                     /* is this code point assigned, or do we use fallbacks? */
   4060                     if (value >= minValue) {
   4061                         /* assigned, write the output character bytes from value and length */
   4062                         /* length==1 */
   4063                         /* this is easy because we know that there is enough space */
   4064                         target.put((byte) value);
   4065                         --targetCapacity;
   4066 
   4067                         /* normal end of conversion: prepare for a new character */
   4068                         c = 0;
   4069                         continue;
   4070                     } else if (!UTF16.isSurrogate((char) c)) {
   4071                         /* normal, unassigned BMP character */
   4072                     } else if (UTF16.isLeadSurrogate((char) c)) {
   4073                         // getTrail:
   4074                         SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
   4075                         doloop = getTrailSingleBMP(source, x, cr);
   4076                         c = x.c;
   4077                         sourceArrayIndex = x.sourceArrayIndex;
   4078                         if (!doloop)
   4079                             break;
   4080                     } else {
   4081                         /* this is an unmatched trail code unit (2nd surrogate) */
   4082                         /* callback(illegal) */
   4083                         cr[0] = CoderResult.malformedForLength(1);
   4084                         break;
   4085                     }
   4086 
   4087                     /* c does not have a mapping */
   4088 
   4089                     /* get the number of code units for c to correctly advance sourceIndex */
   4090                     length = UTF16.getCharCount(c);
   4091 
   4092                     /* set offsets since the start or the last extension */
   4093                     if (offsets != null) {
   4094                         int count = sourceArrayIndex - lastSource;
   4095 
   4096                         /* do not set the offset for this character */
   4097                         count -= length;
   4098 
   4099                         while (count > 0) {
   4100                             offsets.put(sourceIndex++);
   4101                             --count;
   4102                         }
   4103                         /* offsets and sourceIndex are now set for the current character */
   4104                     }
   4105 
   4106                     /* try an extension mapping */
   4107                     lastSource = sourceArrayIndex;
   4108                     source.position(sourceArrayIndex);
   4109                     c = fromU(c, source, target, offsets, sourceIndex, length, flush, cr);
   4110                     sourceArrayIndex = source.position();
   4111                     sourceIndex += length + (sourceArrayIndex - lastSource);
   4112                     lastSource = sourceArrayIndex;
   4113 
   4114                     if (cr[0].isError()) {
   4115                         /* not mappable or buffer overflow */
   4116                         break;
   4117                     } else {
   4118                         /* a mapping was written to the target, continue */
   4119 
   4120                         /* recalculate the targetCapacity after an extension mapping */
   4121                         targetCapacity = target.remaining();
   4122                         length = source.limit() - sourceArrayIndex;
   4123                         if (length < targetCapacity) {
   4124                             targetCapacity = length;
   4125                         }
   4126                     }
   4127                 }
   4128             }
   4129 
   4130             if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
   4131                 /* target is full */
   4132                 cr[0] = CoderResult.OVERFLOW;
   4133             }
   4134 
   4135             /* set offsets since the start or the last callback */
   4136             if (offsets != null) {
   4137                 int count = sourceArrayIndex - lastSource;
   4138                 while (count > 0) {
   4139                     offsets.put(sourceIndex++);
   4140                     --count;
   4141                 }
   4142             }
   4143 
   4144             /* set the converter state back into UConverter */
   4145             fromUChar32 = c;
   4146 
   4147             /* write back the updated pointers */
   4148             source.position(sourceArrayIndex);
   4149 
   4150             return cr[0];
   4151         }
   4152 
   4153         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
   4154         private CoderResult cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
   4155                 IntBuffer offsets, boolean flush) {
   4156 
   4157             CoderResult[] cr = { CoderResult.UNDERFLOW };
   4158 
   4159             int sourceArrayIndex;
   4160 
   4161             char[] table;
   4162             char[] results;
   4163 
   4164             int c;
   4165             int sourceIndex, nextSourceIndex;
   4166 
   4167             char value, minValue;
   4168 
   4169             /* set up the local pointers */
   4170             short uniMask;
   4171             sourceArrayIndex = source.position();
   4172 
   4173             table = sharedData.mbcs.fromUnicodeTable;
   4174 
   4175             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   4176                 results = sharedData.mbcs.swapLFNLFromUnicodeChars;
   4177             } else {
   4178                 results = sharedData.mbcs.fromUnicodeChars;
   4179             }
   4180 
   4181             if (useFallback) {
   4182                 /* use all roundtrip and fallback results */
   4183                 minValue = 0x800;
   4184             } else {
   4185                 /* use only roundtrips and fallbacks from private-use characters */
   4186                 minValue = 0xc00;
   4187             }
   4188             // agljport:comment hasSupplementary only used in getTrail block which now simply repeats the mask operation
   4189             uniMask = sharedData.mbcs.unicodeMask;
   4190 
   4191             /* get the converter state from UConverter */
   4192             c = fromUChar32;
   4193 
   4194             /* sourceIndex=-1 if the current character began in the previous buffer */
   4195             sourceIndex = c == 0 ? 0 : -1;
   4196             nextSourceIndex = 0;
   4197 
   4198             boolean doloop = true;
   4199             boolean doread = true;
   4200             if (c != 0 && target.hasRemaining()) {
   4201                 if (UTF16.isLeadSurrogate((char) c)) {
   4202                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
   4203                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4204                     doread = x.doread;
   4205                     c = x.c;
   4206                     sourceArrayIndex = x.sourceArrayIndex;
   4207                     sourceIndex = x.sourceIndex;
   4208                     nextSourceIndex = x.nextSourceIndex;
   4209                 } else {
   4210                     doread = false;
   4211                 }
   4212             }
   4213 
   4214             if (doloop) {
   4215                 while (!doread || sourceArrayIndex < source.limit()) {
   4216                     /*
   4217                      * This following test is to see if available input would overflow the output. It does not catch
   4218                      * output of more than one byte that overflows as a result of a multi-byte character or callback
   4219                      * output from the last source character. Therefore, those situations also test for overflows and
   4220                      * will then break the loop, too.
   4221                      */
   4222                     if (target.hasRemaining()) {
   4223                         /*
   4224                          * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate
   4225                          * pair for a "supplementary code point".
   4226                          */
   4227 
   4228                         if (doread) {
   4229                             c = source.get(sourceArrayIndex++);
   4230                             ++nextSourceIndex;
   4231                             if (UTF16.isSurrogate((char) c)) {
   4232                                 if (UTF16.isLeadSurrogate((char) c)) {
   4233                                     // getTrail:
   4234                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4235                                             nextSourceIndex);
   4236                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4237                                     c = x.c;
   4238                                     sourceArrayIndex = x.sourceArrayIndex;
   4239                                     sourceIndex = x.sourceIndex;
   4240                                     nextSourceIndex = x.nextSourceIndex;
   4241                                     if (x.doread) {
   4242                                         if (doloop)
   4243                                             continue;
   4244                                         else
   4245                                             break;
   4246                                     }
   4247                                 } else {
   4248                                     /* this is an unmatched trail code unit (2nd surrogate) */
   4249                                     /* callback(illegal) */
   4250                                     cr[0] = CoderResult.malformedForLength(1);
   4251                                     break;
   4252                                 }
   4253                             }
   4254                         } else {
   4255                             doread = true;
   4256                         }
   4257 
   4258                         /* convert the Unicode code point in c into codepage bytes */
   4259                         value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
   4260 
   4261                         /* is this code point assigned, or do we use fallbacks? */
   4262                         if (value >= minValue) {
   4263                             /* assigned, write the output character bytes from value and length */
   4264                             /* length==1 */
   4265                             /* this is easy because we know that there is enough space */
   4266                             target.put((byte) value);
   4267                             if (offsets != null) {
   4268                                 offsets.put(sourceIndex);
   4269                             }
   4270 
   4271                             /* normal end of conversion: prepare for a new character */
   4272                             c = 0;
   4273                             sourceIndex = nextSourceIndex;
   4274                         } else { /* unassigned */
   4275                             /* try an extension mapping */
   4276                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4277                                     nextSourceIndex);
   4278                             doloop = unassignedDouble(source, target, x, flush, cr);
   4279                             c = x.c;
   4280                             sourceArrayIndex = x.sourceArrayIndex;
   4281                             sourceIndex = x.sourceIndex;
   4282                             nextSourceIndex = x.nextSourceIndex;
   4283                             if (!doloop)
   4284                                 break;
   4285                         }
   4286                     } else {
   4287                         /* target is full */
   4288                         cr[0] = CoderResult.OVERFLOW;
   4289                         break;
   4290                     }
   4291                 }
   4292             }
   4293 
   4294             /* set the converter state back into UConverter */
   4295             fromUChar32 = c;
   4296 
   4297             /* write back the updated pointers */
   4298             source.position(sourceArrayIndex);
   4299 
   4300             return cr[0];
   4301         }
   4302 
   4303         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
   4304         private CoderResult cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
   4305                 IntBuffer offsets, boolean flush) {
   4306             CoderResult[] cr = { CoderResult.UNDERFLOW };
   4307 
   4308             int sourceArrayIndex;
   4309 
   4310             char[] table;
   4311             char[] chars;
   4312 
   4313             int c, sourceIndex, nextSourceIndex;
   4314 
   4315             int stage2Entry;
   4316             int value;
   4317             int length;
   4318             short uniMask;
   4319 
   4320             /* use optimized function if possible */
   4321             uniMask = sharedData.mbcs.unicodeMask;
   4322 
   4323             /* set up the local pointers */
   4324             sourceArrayIndex = source.position();
   4325 
   4326             table = sharedData.mbcs.fromUnicodeTable;
   4327             int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
   4328 
   4329             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   4330                 chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
   4331             } else {
   4332                 chars = sharedData.mbcs.fromUnicodeChars;
   4333             }
   4334 
   4335             /* get the converter state from UConverter */
   4336             c = fromUChar32;
   4337 
   4338             /* sourceIndex=-1 if the current character began in the previous buffer */
   4339             sourceIndex = c == 0 ? 0 : -1;
   4340             nextSourceIndex = 0;
   4341 
   4342             /* conversion loop */
   4343             boolean doloop = true;
   4344             boolean doread = true;
   4345             if (c != 0 && target.hasRemaining()) {
   4346                 if (UTF16.isLeadSurrogate((char) c)) {
   4347                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
   4348                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4349                     doread = x.doread;
   4350                     c = x.c;
   4351                     sourceArrayIndex = x.sourceArrayIndex;
   4352                     sourceIndex = x.sourceIndex;
   4353                     nextSourceIndex = x.nextSourceIndex;
   4354                 } else {
   4355                     doread = false;
   4356                 }
   4357             }
   4358 
   4359             if (doloop) {
   4360                 while (!doread || sourceArrayIndex < source.limit()) {
   4361                     /*
   4362                      * This following test is to see if available input would overflow the output. It does not catch
   4363                      * output of more than one byte that overflows as a result of a multi-byte character or callback
   4364                      * output from the last source character. Therefore, those situations also test for overflows and
   4365                      * will then break the loop, too.
   4366                      */
   4367                     if (target.hasRemaining()) {
   4368                         if (doread) {
   4369                             /*
   4370                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
   4371                              * surrogate pair for a "supplementary code point".
   4372                              */
   4373                             c = source.get(sourceArrayIndex++);
   4374                             ++nextSourceIndex;
   4375                             /*
   4376                              * This also tests if the codepage maps single surrogates. If it does, then surrogates are
   4377                              * not paired but mapped separately. Note that in this case unmatched surrogates are not
   4378                              * detected.
   4379                              */
   4380                             if (UTF16.isSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   4381                                 if (UTF16.isLeadSurrogate((char) c)) {
   4382                                     // getTrail:
   4383                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4384                                             nextSourceIndex);
   4385                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4386                                     c = x.c;
   4387                                     sourceArrayIndex = x.sourceArrayIndex;
   4388                                     sourceIndex = x.sourceIndex;
   4389                                     nextSourceIndex = x.nextSourceIndex;
   4390 
   4391                                     if (x.doread) {
   4392                                         if (doloop)
   4393                                             continue;
   4394                                         else
   4395                                             break;
   4396                                     }
   4397                                 } else {
   4398                                     /* this is an unmatched trail code unit (2nd surrogate) */
   4399                                     /* callback(illegal) */
   4400                                     cr[0] = CoderResult.malformedForLength(1);
   4401                                     break;
   4402                                 }
   4403                             }
   4404                         } else {
   4405                             doread = true;
   4406                         }
   4407 
   4408                         /* convert the Unicode code point in c into codepage bytes */
   4409                         stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
   4410 
   4411                         /* get the bytes and the length for the output */
   4412                         /* MBCS_OUTPUT_2 */
   4413                         value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   4414                         if (value <= 0xff) {
   4415                             length = 1;
   4416                         } else {
   4417                             length = 2;
   4418                         }
   4419 
   4420                         /* is this code point assigned, or do we use fallbacks? */
   4421                         if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0))) {
   4422                             /*
   4423                              * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
   4424                              * with this data structure for fallback output to be a zero byte.
   4425                              */
   4426 
   4427                             // unassigned:
   4428                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4429                                     nextSourceIndex);
   4430 
   4431                             doloop = unassignedDouble(source, target, x, flush, cr);
   4432                             c = x.c;
   4433                             sourceArrayIndex = x.sourceArrayIndex;
   4434                             sourceIndex = x.sourceIndex;
   4435                             nextSourceIndex = x.nextSourceIndex;
   4436                             if (doloop)
   4437                                 continue;
   4438                             else
   4439                                 break;
   4440                         }
   4441 
   4442                         /* write the output character bytes from value and length */
   4443                         /* from the first if in the loop we know that targetCapacity>0 */
   4444                         if (length == 1) {
   4445                             /* this is easy because we know that there is enough space */
   4446                             target.put((byte) value);
   4447                             if (offsets != null) {
   4448                                 offsets.put(sourceIndex);
   4449                             }
   4450                         } else /* length==2 */{
   4451                             target.put((byte) (value >>> 8));
   4452                             if (2 <= target.remaining()) {
   4453                                 target.put((byte) value);
   4454                                 if (offsets != null) {
   4455                                     offsets.put(sourceIndex);
   4456                                     offsets.put(sourceIndex);
   4457                                 }
   4458                             } else {
   4459                                 if (offsets != null) {
   4460                                     offsets.put(sourceIndex);
   4461                                 }
   4462                                 errorBuffer[0] = (byte) value;
   4463                                 errorBufferLength = 1;
   4464 
   4465                                 /* target overflow */
   4466                                 cr[0] = CoderResult.OVERFLOW;
   4467                                 c = 0;
   4468                                 break;
   4469                             }
   4470                         }
   4471 
   4472                         /* normal end of conversion: prepare for a new character */
   4473                         c = 0;
   4474                         sourceIndex = nextSourceIndex;
   4475                         continue;
   4476                     } else {
   4477                         /* target is full */
   4478                         cr[0] = CoderResult.OVERFLOW;
   4479                         break;
   4480                     }
   4481                 }
   4482             }
   4483 
   4484             /* set the converter state back into UConverter */
   4485             fromUChar32 = c;
   4486 
   4487             /* write back the updated pointers */
   4488             source.position(sourceArrayIndex);
   4489 
   4490             return cr[0];
   4491         }
   4492 
   4493         private final class SideEffectsSingleBMP {
   4494             int c, sourceArrayIndex;
   4495 
   4496             SideEffectsSingleBMP(int c_, int sourceArrayIndex_) {
   4497                 c = c_;
   4498                 sourceArrayIndex = sourceArrayIndex_;
   4499             }
   4500         }
   4501 
   4502         // function made out of block labeled getTrail in ucnv_MBCSSingleFromUnicodeWithOffsets
   4503         // assumes input c is lead surrogate
   4504         private final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr) {
   4505             if (x.sourceArrayIndex < source.limit()) {
   4506                 /* test the following code unit */
   4507                 char trail = source.get(x.sourceArrayIndex);
   4508                 if (UTF16.isTrailSurrogate(trail)) {
   4509                     ++x.sourceArrayIndex;
   4510                     x.c = UCharacter.getCodePoint((char) x.c, trail);
   4511                     /* this codepage does not map supplementary code points */
   4512                     /* callback(unassigned) */
   4513                     cr[0] = CoderResult.unmappableForLength(2);
   4514                     return false;
   4515                 } else {
   4516                     /* this is an unmatched lead code unit (1st surrogate) */
   4517                     /* callback(illegal) */
   4518                     cr[0] = CoderResult.malformedForLength(1);
   4519                     return false;
   4520                 }
   4521             } else {
   4522                 /* no more input */
   4523                 return false;
   4524             }
   4525             // return true;
   4526         }
   4527 
   4528         private final class SideEffects {
   4529             int c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength;
   4530             boolean doread = true;
   4531 
   4532             SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_,
   4533                     int prevLength_) {
   4534                 c = c_;
   4535                 sourceArrayIndex = sourceArrayIndex_;
   4536                 sourceIndex = sourceIndex_;
   4537                 nextSourceIndex = nextSourceIndex_;
   4538                 prevSourceIndex = prevSourceIndex_;
   4539                 prevLength = prevLength_;
   4540             }
   4541         }
   4542 
   4543         // function made out of block labeled getTrail in ucnv_MBCSFromUnicodeWithOffsets
   4544         // assumes input c is lead surrogate
   4545         private final boolean getTrail(CharBuffer source, ByteBuffer target, int uniMask, SideEffects x,
   4546                 boolean flush, CoderResult[] cr) {
   4547             if (x.sourceArrayIndex < source.limit()) {
   4548                 /* test the following code unit */
   4549                 char trail = source.get(x.sourceArrayIndex);
   4550                 if (UTF16.isTrailSurrogate(trail)) {
   4551                     ++x.sourceArrayIndex;
   4552                     ++x.nextSourceIndex;
   4553                     /* convert this supplementary code point */
   4554                     x.c = UCharacter.getCodePoint((char) x.c, trail);
   4555                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
   4556                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   4557                         fromUnicodeStatus = x.prevLength; /* save the old state */
   4558                         /* callback(unassigned) */
   4559                         x.doread = true;
   4560                         return unassigned(source, target, null, x, flush, cr);
   4561                     } else {
   4562                         x.doread = false;
   4563                         return true;
   4564                     }
   4565                 } else {
   4566                     /* this is an unmatched lead code unit (1st surrogate) */
   4567                     /* callback(illegal) */
   4568                     cr[0] = CoderResult.malformedForLength(1);
   4569                     return false;
   4570                 }
   4571             } else {
   4572                 /* no more input */
   4573                 return false;
   4574             }
   4575         }
   4576 
   4577         // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
   4578         private final boolean unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x,
   4579                 boolean flush, CoderResult[] cr) {
   4580             /* try an extension mapping */
   4581             int sourceBegin = x.sourceArrayIndex;
   4582             source.position(x.sourceArrayIndex);
   4583             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
   4584             x.sourceArrayIndex = source.position();
   4585             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
   4586             x.prevLength = fromUnicodeStatus;
   4587 
   4588             if (cr[0].isError()) {
   4589                 /* not mappable or buffer overflow */
   4590                 return false;
   4591             } else {
   4592                 /* a mapping was written to the target, continue */
   4593 
   4594                 /* recalculate the targetCapacity after an extension mapping */
   4595                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
   4596                 /* normal end of conversion: prepare for a new character */
   4597                 if (offsets != null) {
   4598                     x.prevSourceIndex = x.sourceIndex;
   4599                     x.sourceIndex = x.nextSourceIndex;
   4600                 }
   4601                 return true;
   4602             }
   4603         }
   4604 
   4605         private final class SideEffectsDouble {
   4606             int c, sourceArrayIndex, sourceIndex, nextSourceIndex;
   4607             boolean doread = true;
   4608 
   4609             SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_) {
   4610                 c = c_;
   4611                 sourceArrayIndex = sourceArrayIndex_;
   4612                 sourceIndex = sourceIndex_;
   4613                 nextSourceIndex = nextSourceIndex_;
   4614             }
   4615         }
   4616 
   4617         // function made out of block labeled getTrail in ucnv_MBCSDoubleFromUnicodeWithOffsets
   4618         // assumes input c is lead surrogate
   4619         private final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask,
   4620                 SideEffectsDouble x, boolean flush, CoderResult[] cr) {
   4621             if (x.sourceArrayIndex < source.limit()) {
   4622                 /* test the following code unit */
   4623                 char trail = source.get(x.sourceArrayIndex);
   4624                 if (UTF16.isTrailSurrogate(trail)) {
   4625                     ++x.sourceArrayIndex;
   4626                     ++x.nextSourceIndex;
   4627                     /* convert this supplementary code point */
   4628                     x.c = UCharacter.getCodePoint((char) x.c, trail);
   4629                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
   4630                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   4631                         /* callback(unassigned) */
   4632                         x.doread = true;
   4633                         return unassignedDouble(source, target, x, flush, cr);
   4634                     } else {
   4635                         x.doread = false;
   4636                         return true;
   4637                     }
   4638                 } else {
   4639                     /* this is an unmatched lead code unit (1st surrogate) */
   4640                     /* callback(illegal) */
   4641                     cr[0] = CoderResult.malformedForLength(1);
   4642                     return false;
   4643                 }
   4644             } else {
   4645                 /* no more input */
   4646                 return false;
   4647             }
   4648         }
   4649 
   4650         // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets
   4651         private final boolean unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x,
   4652                 boolean flush, CoderResult[] cr) {
   4653             /* try an extension mapping */
   4654             int sourceBegin = x.sourceArrayIndex;
   4655             source.position(x.sourceArrayIndex);
   4656             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
   4657             x.sourceArrayIndex = source.position();
   4658             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
   4659 
   4660             if (cr[0].isError()) {
   4661                 /* not mappable or buffer overflow */
   4662                 return false;
   4663             } else {
   4664                 /* a mapping was written to the target, continue */
   4665 
   4666                 /* recalculate the targetCapacity after an extension mapping */
   4667                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
   4668                 /* normal end of conversion: prepare for a new character */
   4669                 x.sourceIndex = x.nextSourceIndex;
   4670                 return true;
   4671             }
   4672         }
   4673 
   4674         /**
   4675          * Overrides super class method
   4676          *
   4677          * @param encoder
   4678          * @param source
   4679          * @param target
   4680          * @param offsets
   4681          * @return
   4682          */
   4683         protected CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target,
   4684                 IntBuffer offsets) {
   4685             CharsetMBCS cs = (CharsetMBCS) encoder.charset();
   4686             byte[] subchar;
   4687             int length;
   4688 
   4689             if (cs.subChar1 != 0
   4690                     && (cs.sharedData.mbcs.extIndexes != null ? encoder.useSubChar1
   4691                             : (encoder.invalidUCharBuffer[0] <= 0xff))) {
   4692                 /*
   4693                  * select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS
   4694                  * behavior)
   4695                  */
   4696                 subchar = new byte[] { cs.subChar1 };
   4697                 length = 1;
   4698             } else {
   4699                 /* select subChar in all other cases */
   4700                 subchar = cs.subChar;
   4701                 length = cs.subCharLen;
   4702             }
   4703 
   4704             /* reset the selector for the next code point */
   4705             encoder.useSubChar1 = false;
   4706 
   4707             if (cs.sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
   4708                 byte[] buffer = new byte[4];
   4709                 int i = 0;
   4710 
   4711                 /* fromUnicodeStatus contains prevLength */
   4712                 switch (length) {
   4713                 case 1:
   4714                     if (encoder.fromUnicodeStatus == 2) {
   4715                         /* DBCS mode and SBCS sub char: change to SBCS */
   4716                         encoder.fromUnicodeStatus = 1;
   4717                         buffer[i++] = UConverterConstants.SI;
   4718                     }
   4719                     buffer[i++] = subchar[0];
   4720                     break;
   4721                 case 2:
   4722                     if (encoder.fromUnicodeStatus <= 1) {
   4723                         /* SBCS mode and DBCS sub char: change to DBCS */
   4724                         encoder.fromUnicodeStatus = 2;
   4725                         buffer[i++] = UConverterConstants.SO;
   4726                     }
   4727                     buffer[i++] = subchar[0];
   4728                     buffer[i++] = subchar[1];
   4729                     break;
   4730                 default:
   4731                     throw new IllegalArgumentException();
   4732                 }
   4733 
   4734                 subchar = buffer;
   4735                 length = i;
   4736             }
   4737             return CharsetEncoderICU.fromUWriteBytes(encoder, subchar, 0, length, target, offsets, source.position());
   4738         }
   4739 
   4740         /**
   4741          * Gets called whenever CharsetEncoder.replaceWith gets called. allowReplacementChanges only allows subChar and
   4742          * subChar1 to be modified outside construction (since replaceWith is called once during construction).
   4743          *
   4744          * @param replacement
   4745          *            The replacement for subchar.
   4746          */
   4747         protected void implReplaceWith(byte[] replacement) {
   4748             if (allowReplacementChanges) {
   4749                 CharsetMBCS cs = (CharsetMBCS) this.charset();
   4750 
   4751                 System.arraycopy(replacement, 0, cs.subChar, 0, replacement.length);
   4752                 cs.subCharLen = (byte) replacement.length;
   4753                 cs.subChar1 = 0;
   4754             }
   4755         }
   4756     }
   4757 
   4758     public CharsetDecoder newDecoder() {
   4759         return new CharsetDecoderMBCS(this);
   4760     }
   4761 
   4762     public CharsetEncoder newEncoder() {
   4763         return new CharsetEncoderMBCS(this);
   4764     }
   4765 
   4766     @SuppressWarnings("fallthrough")
   4767     void MBCSGetFilteredUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which, int filter){
   4768         UConverterMBCSTable mbcsTable;
   4769         char[] table;
   4770         char st1,maxStage1, st2;
   4771         int st3;
   4772         int c ;
   4773 
   4774         mbcsTable = data.mbcs;
   4775         table = mbcsTable.fromUnicodeTable;
   4776         if(mbcsTable.hasSupplementary()){
   4777             maxStage1 = 0x440;
   4778         }
   4779         else{
   4780             maxStage1 = 0x40;
   4781         }
   4782         c=0; /* keep track of current code point while enumerating */
   4783 
   4784         if(mbcsTable.outputType==MBCS_OUTPUT_1){
   4785             char stage2, stage3;
   4786             char minValue;
   4787             char[] results = mbcsTable.fromUnicodeChars;
   4788 
   4789             if(which==ROUNDTRIP_SET) {
   4790                 /* use only roundtrips */
   4791                 minValue=0xf00;
   4792             } else {
   4793                 /* use all roundtrip and fallback results */
   4794                 minValue=0x800;
   4795             }
   4796             for(st1=0;st1<maxStage1;++st1){
   4797                 st2 = table[st1];
   4798                 if(st2>maxStage1){
   4799                     stage2 = st2;
   4800                     for(st2=0; st2<64; ++st2){
   4801                         st3 = table[stage2 + st2];
   4802                         if(st3!=0){
   4803                             /*read the stage 3 block */
   4804                             stage3 = (char)st3;
   4805                             do {
   4806                                 if(results[stage3++]>=minValue){
   4807                                      setFillIn.add(c);
   4808                                 }
   4809                             }while((++c&0xf) !=0);
   4810                           } else {
   4811                             c+= 16; /*empty stage 2 block */
   4812                         }
   4813                     }
   4814                 } else {
   4815                     c+=1024; /* empty stage 2 block */
   4816                 }
   4817             }
   4818         } else {
   4819             int[] tableInts = mbcsTable.fromUnicodeTableInts;
   4820             int stage2,stage3;
   4821             byte[] bytes;
   4822             int st3Multiplier;
   4823             int value;
   4824             boolean useFallBack;
   4825             bytes = mbcsTable.fromUnicodeBytes;
   4826             char[] chars = mbcsTable.fromUnicodeChars;
   4827             int[] ints = mbcsTable.fromUnicodeInts;
   4828             useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET);
   4829             switch(mbcsTable.outputType) {
   4830             case MBCS_OUTPUT_3:
   4831             case MBCS_OUTPUT_4_EUC:
   4832                 st3Multiplier = 3;
   4833                 break;
   4834             case MBCS_OUTPUT_4:
   4835                 st3Multiplier =4;
   4836                 break;
   4837             default:
   4838                 st3Multiplier =2;
   4839                 break;
   4840             }
   4841 
   4842             for(st1=0;st1<maxStage1;++st1){
   4843                 st2 = table[st1];
   4844                 if(st2>(maxStage1>>1)){
   4845                     stage2 =  st2 ;
   4846                     for(st2=0;st2<64;++st2){
   4847                         /*read the stage 3 block */
   4848                         st3 = tableInts[stage2 + st2];
   4849                         if(st3!=0){
   4850                         //if((st3=table[stage2+st2])!=0){
   4851                             stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK);
   4852 
   4853                             /* get the roundtrip flags for the stage 3 block */
   4854                             st3>>>=16;
   4855                             switch(filter) {
   4856                             case UCNV_SET_FILTER_NONE:
   4857                                 do {
   4858                                    if((st3&1)!=0){
   4859                                         setFillIn.add(c);
   4860                                    }else if (useFallBack) {
   4861                                         int b =0;
   4862                                         switch(st3Multiplier) {
   4863                                         case 4:
   4864                                             b = ints[stage3 / 4];
   4865                                             break;
   4866                                         case 3:
   4867                                             b |= bytes[stage3] | bytes[stage3 + 1] | bytes[stage3 + 2];
   4868                                             break;
   4869                                         case 2:
   4870                                             b = chars[stage3 / 2];
   4871                                             break;
   4872                                         default:
   4873                                             break;
   4874                                         }
   4875                                         stage3+=st3Multiplier;
   4876                                         if(b!=0) {
   4877                                             setFillIn.add(c);
   4878                                         }
   4879                                     }
   4880                                     st3>>=1;
   4881                                 }while((++c&0xf)!=0);
   4882                                 break;
   4883                             case UCNV_SET_FILTER_DBCS_ONLY:
   4884                                 /* Ignore single bytes results (<0x100). */
   4885                                 do {
   4886                                     if(((st3&1) != 0 || useFallBack) && chars[stage3 / 2] >= 0x100){
   4887                                         setFillIn.add(c);
   4888                                     }
   4889                                     st3>>=1;
   4890                                     stage3+=2;
   4891                                 }while((++c&0xf) != 0);
   4892                                break;
   4893                             case UCNV_SET_FILTER_2022_CN :
   4894                                 /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */
   4895                                 do {
   4896                                     if(((st3&1) != 0 || useFallBack) &&
   4897                                             ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & bytes[stage3]))==0x81 || value==0x82) ){
   4898                                         setFillIn.add(c);
   4899                                     }
   4900                                     st3>>=1;
   4901                                     stage3+=3;
   4902                                 }while((++c&0xf)!=0);
   4903                                 break;
   4904                             case UCNV_SET_FILTER_SJIS:
   4905                                 /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */
   4906                                 do{
   4907                                     if(((st3&1) != 0 || useFallBack) && (value=chars[stage3 / 2])>=0x8140 && value<=0xeffc){
   4908                                         setFillIn.add(c);
   4909                                     }
   4910                                     st3>>=1;
   4911                                     stage3+=2;
   4912                                 }while((++c&0xf)!=0);
   4913                                 break;
   4914                             case UCNV_SET_FILTER_GR94DBCS:
   4915                                 /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/
   4916                                 do {
   4917                                     if(((st3&1) != 0 || useFallBack) &&
   4918                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])- 0xa1a1))<=(0xfefe - 0xa1a1) &&
   4919                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
   4920                                         setFillIn.add(c);
   4921                                     }
   4922                                     st3>>=1;
   4923                                     stage3+=2;
   4924                                 }while((++c&0xf)!=0);
   4925                                 break;
   4926                             case UCNV_SET_FILTER_HZ:
   4927                                 /*Only add code points that are suitable for HZ DBCS*/
   4928                                 do {
   4929                                     if( ((st3&1) != 0 || useFallBack) &&
   4930                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])-0xa1a1))<=(0xfdfe - 0xa1a1) &&
   4931                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
   4932                                         setFillIn.add(c);
   4933                                     }
   4934                                     st3>>=1;
   4935                                     stage3+=2;
   4936                                 }while((++c&0xf) != 0);
   4937                                 break;
   4938                             default:
   4939                                 return;
   4940                             }
   4941                         } else {
   4942                             c+=16; /* empty stage 3 block */
   4943                         }
   4944                     }
   4945                 } else {
   4946                     c+=1024; /*empty stage2 block */
   4947                 }
   4948             }
   4949         }
   4950         extGetUnicodeSet(setFillIn, which, filter, data);
   4951     }
   4952 
   4953     static void extGetUnicodeSetString(ByteBuffer cx,UnicodeSet setFillIn, boolean useFallback,
   4954         int minLength, int c, char s[],int length,int sectionIndex){
   4955         CharBuffer fromUSectionUChar;
   4956         IntBuffer fromUSectionValues;
   4957         fromUSectionUChar = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX,char.class );
   4958         fromUSectionValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX,int.class );
   4959         int fromUSectionUCharIndex = fromUSectionUChar.position()+sectionIndex;
   4960         int fromUSectionValuesIndex = fromUSectionValues.position()+sectionIndex;
   4961         int value, i, count;
   4962 
   4963         /* read first pair of the section */
   4964        count = fromUSectionUChar.get(fromUSectionUCharIndex++);
   4965        value = fromUSectionValues.get(fromUSectionValuesIndex++);
   4966        if(value!=0 && (FROM_U_IS_ROUNDTRIP(value) || useFallback) && FROM_U_GET_LENGTH(value)>=minLength) {
   4967            if(c>=0){
   4968                setFillIn.add(c);
   4969            } else {
   4970                StringBuilder normalizedStringBuilder = new StringBuilder();
   4971                for(int j=0; j<length;j++){
   4972                    normalizedStringBuilder.append(s[j]);
   4973                }
   4974                String normalizedString = normalizedStringBuilder.toString();
   4975                for(int j=0;j<length;j++){
   4976                    setFillIn.add(normalizedString);
   4977                }
   4978              }
   4979        }
   4980 
   4981        for(i=0; i<count; ++i){
   4982            s[length] = fromUSectionUChar.get(fromUSectionUCharIndex + i);
   4983            value = fromUSectionValues.get(fromUSectionValuesIndex + i);
   4984 
   4985            if(value==0) {
   4986                /* no mapping, do nothing */
   4987            } else if (FROM_U_IS_PARTIAL(value)) {
   4988                extGetUnicodeSetString( cx, setFillIn, useFallback, minLength, UConverterConstants.U_SENTINEL, s, length+1,
   4989                        FROM_U_GET_PARTIAL_INDEX(value));
   4990            } else if ((useFallback ? (value&FROM_U_RESERVED_MASK)==0:((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))==FROM_U_ROUNDTRIP_FLAG))
   4991                    && FROM_U_GET_LENGTH(value)>=minLength) {
   4992                StringBuilder normalizedStringBuilder = new StringBuilder(); // String for composite characters
   4993                for(int j=0; j<(length+1);j++){
   4994                    normalizedStringBuilder.append(s[j]);
   4995                }
   4996              setFillIn.add(normalizedStringBuilder.toString());
   4997            }
   4998        }
   4999 
   5000     }
   5001 
   5002 
   5003     static void extGetUnicodeSet(UnicodeSet setFillIn, int which, int filter, UConverterSharedData Data){
   5004         int st1, stage1Length, st2, st3, minLength;
   5005         int ps2, ps3;
   5006 
   5007         CharBuffer stage12, stage3;
   5008         int value, length;
   5009         IntBuffer stage3b;
   5010         boolean useFallback;
   5011         char s[] = new char[MAX_UCHARS];
   5012         int c;
   5013         ByteBuffer cx = Data.mbcs.extIndexes;
   5014         if(cx == null){
   5015             return;
   5016         }
   5017         stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX,char.class );
   5018         stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX,char.class );
   5019         stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX,int.class );
   5020 
   5021         stage1Length = cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH);
   5022         useFallback = (which==ROUNDTRIP_AND_FALLBACK_SET);
   5023 
   5024         c = 0;
   5025         if(filter == UCNV_SET_FILTER_2022_CN) {
   5026             minLength = 3;
   5027         } else if (Data.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY || filter != UCNV_SET_FILTER_NONE) {
   5028             /* DBCS-only, ignore single-byte results */
   5029             minLength = 2;
   5030         } else {
   5031             minLength = 1;
   5032         }
   5033 
   5034         for(st1=0; st1< stage1Length; ++st1){
   5035             st2 = stage12.get(st1);
   5036             if(st2>stage1Length) {
   5037                 ps2 = st2;
   5038                 for(st2=0;st2<64;++st2){
   5039                     st3=((int) stage12.get(ps2+st2))<<STAGE_2_LEFT_SHIFT;
   5040                     if(st3!= 0){
   5041                         ps3 = st3;
   5042                         do {
   5043                             value = stage3b.get(stage3.get(ps3++));
   5044                             if(value==0){
   5045                                 /* no mapping do nothing */
   5046                             }else if (FROM_U_IS_PARTIAL(value)){
   5047                                 length = 0;
   5048                                 length=UTF16.append(s, length, c);
   5049                                 extGetUnicodeSetString(cx,setFillIn,useFallback,minLength,c,s,length,FROM_U_GET_PARTIAL_INDEX(value));
   5050                             } else if ((useFallback ?  (value&FROM_U_RESERVED_MASK)==0 :((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))== FROM_U_ROUNDTRIP_FLAG)) &&
   5051                                     FROM_U_GET_LENGTH(value)>=minLength){
   5052 
   5053                                 switch(filter) {
   5054                                 case UCNV_SET_FILTER_2022_CN:
   5055                                     if(!(FROM_U_GET_LENGTH(value)==3 && FROM_U_GET_DATA(value)<=0x82ffff)){
   5056                                         continue;
   5057                                     }
   5058                                     break;
   5059                                 case UCNV_SET_FILTER_SJIS:
   5060                                     if(!(FROM_U_GET_LENGTH(value)==2 && (value=FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)){
   5061                                         continue;
   5062                                     }
   5063                                     break;
   5064                                 case UCNV_SET_FILTER_GR94DBCS:
   5065                                     if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfefe - 0xa1a1)
   5066                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
   5067                                         continue;
   5068                                     }
   5069                                     break;
   5070                                 case UCNV_SET_FILTER_HZ:
   5071                                     if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfdfe - 0xa1a1)
   5072                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
   5073                                         continue;
   5074                                     }
   5075                                     break;
   5076                                 default:
   5077                                     /*
   5078                                      * UCNV_SET_FILTER_NONE,
   5079                                      * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
   5080                                      */
   5081                                     break;
   5082                                 }
   5083                                 setFillIn.add(c);
   5084 
   5085                             }
   5086                         }while((++c&0xf) != 0);
   5087 
   5088                     } else {
   5089                         c+=16;   /* emplty stage3 block */
   5090                     }
   5091                 }
   5092             } else {
   5093                 c+=1024;  /* empty stage 2 block*/
   5094             }
   5095         }
   5096     }
   5097 
   5098     void MBCSGetUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which){
   5099         MBCSGetFilteredUnicodeSetForUnicode(data, setFillIn, which,
   5100                 this.sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? UCNV_SET_FILTER_DBCS_ONLY : UCNV_SET_FILTER_NONE );
   5101     }
   5102 
   5103     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
   5104         if((options & MBCS_OPTION_GB18030)!=0){
   5105             setFillIn.add(0, 0xd7ff);
   5106             setFillIn.add(0xe000, 0x10ffff);
   5107         }
   5108         else {
   5109             this.MBCSGetUnicodeSetForUnicode(sharedData, setFillIn, which);
   5110         }
   5111     }
   5112 
   5113 }
   5114