Home | History | Annotate | Download | only in charset
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2006-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.charset;
     11 
     12 import java.io.IOException;
     13 import java.io.InputStream;
     14 import java.nio.Buffer;
     15 import java.nio.BufferOverflowException;
     16 import java.nio.ByteBuffer;
     17 import java.nio.CharBuffer;
     18 import java.nio.IntBuffer;
     19 import java.nio.charset.CharsetDecoder;
     20 import java.nio.charset.CharsetEncoder;
     21 import java.nio.charset.CoderResult;
     22 import java.util.Locale;
     23 
     24 import com.ibm.icu.charset.UConverterSharedData.UConverterType;
     25 import com.ibm.icu.impl.ICUBinary;
     26 import com.ibm.icu.impl.ICUData;
     27 import com.ibm.icu.impl.InvalidFormatException;
     28 import com.ibm.icu.lang.UCharacter;
     29 import com.ibm.icu.text.UTF16;
     30 import com.ibm.icu.text.UnicodeSet;
     31 
     32 class CharsetMBCS extends CharsetICU {
     33 
     34     private byte[] fromUSubstitution = null;
     35     UConverterSharedData sharedData = null;
     36     private static final int MAX_VERSION_LENGTH = 4;
     37 
     38     // these variables are used in getUnicodeSet() and may be changed in future
     39     // typedef enum UConverterSetFilter {
     40       static final int UCNV_SET_FILTER_NONE = 1;
     41       static final int UCNV_SET_FILTER_DBCS_ONLY = 2;
     42       static final int UCNV_SET_FILTER_2022_CN = 3;
     43       static final int UCNV_SET_FILTER_SJIS= 4 ;
     44       static final int UCNV_SET_FILTER_GR94DBCS = 5;
     45       static final int UCNV_SET_FILTER_HZ = 6;
     46       static final int UCNV_SET_FILTER_COUNT = 7;
     47    //  } UConverterSetFilter;
     48 
     49     /**
     50      * Fallbacks to Unicode are stored outside the normal state table and code point structures in a vector of items of
     51      * this type. They are sorted by offset.
     52      */
     53     final static class MBCSToUFallback {
     54         int offset;
     55         int codePoint;
     56 
     57         MBCSToUFallback(int off, int cp) {
     58             offset = off;
     59             codePoint = cp;
     60         }
     61     }
     62 
     63     /**
     64      * This is the MBCS part of the UConverterTable union (a runtime data structure). It keeps all the per-converter
     65      * data and points into the loaded mapping tables.
     66      */
     67     static final class UConverterMBCSTable {
     68         /* toUnicode */
     69         short countStates;
     70         byte dbcsOnlyState;
     71         boolean stateTableOwned;
     72         int countToUFallbacks;
     73 
     74         int stateTable[/* countStates */][/* 256 */];
     75         int swapLFNLStateTable[/* countStates */][/* 256 */]; /* for swaplfnl */
     76         char unicodeCodeUnits[/* countUnicodeResults */];
     77         MBCSToUFallback toUFallbacks[/* countToUFallbacks */];
     78 
     79         /* fromUnicode */
     80         char fromUnicodeTable[];  // stage1, and for MBCS_OUTPUT_1 also contains stage2
     81         int fromUnicodeTableInts[];  // stage1 and stage2 together as int[]
     82         // Exactly one of the fromUnicode(Type) tables is not null,
     83         // depending on the outputType.
     84         byte fromUnicodeBytes[];
     85         char fromUnicodeChars[];
     86         int fromUnicodeInts[];
     87         char swapLFNLFromUnicodeChars[]; /* for swaplfnl */
     88         int fromUBytesLength;
     89         short outputType, unicodeMask;
     90 
     91         /* converter name for swaplfnl */
     92         String swapLFNLName;
     93 
     94         /* extension data */
     95         UConverterSharedData baseSharedData;
     96         // int extIndexes[];
     97         ByteBuffer extIndexes; // create int[] view etc. as needed
     98 
     99         CharBuffer mbcsIndex;                     /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
    100         // char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
    101         boolean utf8Friendly;                     /* for utf8Friendly data */
    102         char maxFastUChar;                        /* for utf8Friendly data */
    103 
    104         /* roundtrips */
    105         int asciiRoundtrips;
    106 
    107         UConverterMBCSTable() {
    108             utf8Friendly = false;
    109             mbcsIndex = null;
    110         }
    111 
    112         boolean hasSupplementary() {
    113             return (unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0;
    114         }
    115 
    116         /*
    117          * UConverterMBCSTable(UConverterMBCSTable t) { countStates = t.countStates; dbcsOnlyState = t.dbcsOnlyState;
    118          * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable;
    119          * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks =
    120          * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes;
    121          * swapLFNLFromUnicodeChars = t.swapLFNLFromUnicodeChars; fromUBytesLength = t.fromUBytesLength; outputType =
    122          * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData;
    123          * extIndexes = t.extIndexes; }
    124          */
    125     }
    126 
    127     /* Constants used in MBCS data header */
    128     // enum {
    129         static final int MBCS_OPT_LENGTH_MASK=0x3f;
    130         static final int MBCS_OPT_NO_FROM_U=0x40;
    131         /*
    132          * If any of the following options bits are set,
    133          * then the file must be rejected.
    134          */
    135         static final int MBCS_OPT_INCOMPATIBLE_MASK=0xffc0;
    136         /*
    137          * Remove bits from this mask as more options are recognized
    138          * by all implementations that use this constant.
    139          */
    140         static final int MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80;
    141     // };
    142     /* Constants for fast and UTF-8-friendly conversion. */
    143     // enum {
    144         static final int SBCS_FAST_MAX=0x0fff;               /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */
    145         static final int SBCS_FAST_LIMIT=SBCS_FAST_MAX+1;    /* =0x1000 */
    146         static final int MBCS_FAST_MAX=0xd7ff;               /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */
    147         static final int MBCS_FAST_LIMIT=MBCS_FAST_MAX+1;    /* =0xd800 */
    148     // };
    149     /**
    150      * MBCS data header. See data format description above.
    151      */
    152     final static class MBCSHeader {
    153         byte version[/* U_MAX_VERSION_LENGTH */];
    154         int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes;
    155         int flags;
    156         int fromUBytesLength;
    157 
    158         /* new and required in version 5 */
    159         int options;
    160 
    161         /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */
    162         int fullStage2Length;  /* number of 32-bit units */
    163 
    164         MBCSHeader() {
    165             version = new byte[MAX_VERSION_LENGTH];
    166         }
    167     }
    168 
    169     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath,
    170             ClassLoader loader) throws InvalidFormatException {
    171         super(icuCanonicalName, javaCanonicalName, aliases);
    172 
    173         /* See if the icuCanonicalName contains certain option information. */
    174         if (icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {
    175             options = UConverterConstants.OPTION_SWAP_LFNL;
    176             icuCanonicalName = icuCanonicalName.substring(0, icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
    177             super.icuCanonicalName = icuCanonicalName;
    178         }
    179 
    180         // now try to load the data
    181         sharedData = loadConverter(1, icuCanonicalName, classPath, loader);
    182 
    183         maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
    184         minBytesPerChar = sharedData.staticData.minBytesPerChar;
    185         maxCharsPerByte = 1;
    186         fromUSubstitution = sharedData.staticData.subChar;
    187         subChar = sharedData.staticData.subChar;
    188         subCharLen = sharedData.staticData.subCharLen;
    189         subChar1 = sharedData.staticData.subChar1;
    190         fromUSubstitution = new byte[sharedData.staticData.subCharLen];
    191         System.arraycopy(sharedData.staticData.subChar, 0, fromUSubstitution, 0, sharedData.staticData.subCharLen);
    192 
    193         initializeConverter(options);
    194     }
    195 
    196     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases)
    197             throws InvalidFormatException {
    198         this(icuCanonicalName, javaCanonicalName, aliases, ICUData.ICU_BUNDLE, null);
    199     }
    200 
    201     private UConverterSharedData loadConverter(int nestedLoads, String myName, String classPath, ClassLoader loader)
    202             throws InvalidFormatException {
    203         boolean noFromU = false;
    204         // Read converter data from file
    205         UConverterStaticData staticData = new UConverterStaticData();
    206         UConverterDataReader reader = null;
    207         try {
    208             String itemName = myName + '.' + UConverterSharedData.DATA_TYPE;
    209             String resourceName = classPath + '/' + itemName;
    210             ByteBuffer b;
    211 
    212             if (loader != null) {
    213                 @SuppressWarnings("resource")  // Closed by getByteBufferFromInputStreamAndCloseStream().
    214                 InputStream i = ICUData.getRequiredStream(loader, resourceName);
    215                 b = ICUBinary.getByteBufferFromInputStreamAndCloseStream(i);
    216             } else if (!classPath.equals(ICUData.ICU_BUNDLE)) {
    217                 @SuppressWarnings("resource")  // Closed by getByteBufferFromInputStreamAndCloseStream().
    218                 InputStream i = ICUData.getRequiredStream(resourceName);
    219                 b = ICUBinary.getByteBufferFromInputStreamAndCloseStream(i);
    220             } else {
    221                 b = ICUBinary.getRequiredData(itemName);
    222             }
    223             reader = new UConverterDataReader(b);
    224             reader.readStaticData(staticData);
    225         } catch (IOException e) {
    226             throw new InvalidFormatException(e);
    227         } catch (Exception e) {
    228             throw new InvalidFormatException(e);
    229         }
    230 
    231         int type = staticData.conversionType;
    232 
    233         if (type != UConverterSharedData.UConverterType.MBCS
    234                 || staticData.structSize != UConverterStaticData.SIZE_OF_UCONVERTER_STATIC_DATA) {
    235             throw new InvalidFormatException();
    236         }
    237 
    238         UConverterSharedData data = new UConverterSharedData(staticData);
    239 
    240         // Load data
    241         UConverterMBCSTable mbcsTable = data.mbcs;
    242         MBCSHeader header = new MBCSHeader();
    243         try {
    244             reader.readMBCSHeader(header);
    245         } catch (IOException e) {
    246             throw new InvalidFormatException();
    247         }
    248 
    249         int offset;
    250         // int[] extIndexesArray = null;
    251         String baseNameString = null;
    252 
    253         if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) {
    254             noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0);
    255         } else if (header.version[0] != 4) {
    256             throw new InvalidFormatException();
    257         }
    258 
    259         mbcsTable.outputType = (byte) header.flags;
    260 
    261         /* extension data, header version 4.2 and higher */
    262         offset = header.flags >>> 8;
    263         // if(offset!=0 && mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
    264         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
    265             try {
    266                 baseNameString = reader.readBaseTableName();
    267                 if (offset != 0) {
    268                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
    269                     // terminator byte all already read;
    270                     mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
    271                 }
    272             } catch (IOException e) {
    273                 throw new InvalidFormatException();
    274             }
    275         }
    276 
    277         // agljport:add this would be unnecessary if extIndexes were memory mapped
    278         /*
    279          * if(mbcsTable.extIndexes != null) {
    280          *
    281          * try { //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_LENGTH]*4 +
    282          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_UCHARS_LENGTH]*2 +
    283          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_LENGTH]*6 +
    284          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_BYTES_LENGTH] +
    285          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_12_LENGTH]*2 +
    286          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3_LENGTH]*2 +
    287          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3B_LENGTH]*4; //int nbytes =
    288          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_SIZE] //byte[] extTables = dataReader.readExtTables(nbytes);
    289          * //mbcsTable.extTables = ByteBuffer.wrap(extTables); } catch(IOException e) { System.err.println("Caught
    290          * IOException: " + e.getMessage()); pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR; return; } }
    291          */
    292         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
    293             UConverterSharedData baseSharedData = null;
    294             ByteBuffer extIndexes;
    295             String baseName;
    296 
    297             /* extension-only file, load the base table and set values appropriately */
    298             extIndexes = mbcsTable.extIndexes;
    299             if (extIndexes == null) {
    300                 /* extension-only file without extension */
    301                 throw new InvalidFormatException();
    302             }
    303 
    304             if (nestedLoads != 1) {
    305                 /* an extension table must not be loaded as a base table */
    306                 throw new InvalidFormatException();
    307             }
    308 
    309             /* load the base table */
    310             baseName = baseNameString;
    311             if (baseName.equals(staticData.name)) {
    312                 /* forbid loading this same extension-only file */
    313                 throw new InvalidFormatException();
    314             }
    315 
    316             // agljport:fix args.size=sizeof(UConverterLoadArgs);
    317             baseSharedData = loadConverter(2, baseName, classPath, loader);
    318 
    319             if (baseSharedData.staticData.conversionType != UConverterType.MBCS
    320                     || baseSharedData.mbcs.baseSharedData != null) {
    321                 // agljport:fix ucnv_unload(baseSharedData);
    322                 throw new InvalidFormatException();
    323             }
    324 
    325             /* copy the base table data */
    326             // agljport:comment deep copy in C changes mbcs through local reference mbcsTable; in java we probably don't
    327             // need the deep copy so can just make sure mbcs and its local reference both refer to the same new object
    328             mbcsTable = data.mbcs = baseSharedData.mbcs;
    329 
    330             /* overwrite values with relevant ones for the extension converter */
    331             mbcsTable.baseSharedData = baseSharedData;
    332             mbcsTable.extIndexes = extIndexes;
    333 
    334             /*
    335              * It would be possible to share the swapLFNL data with a base converter, but the generated name would have
    336              * to be different, and the memory would have to be free'd only once. It is easier to just create the data
    337              * for the extension converter separately when it is requested.
    338              */
    339             mbcsTable.swapLFNLStateTable = null;
    340             mbcsTable.swapLFNLFromUnicodeChars = null;
    341             mbcsTable.swapLFNLName = null;
    342 
    343             /*
    344              * Set a special, runtime-only outputType if the extension converter is a DBCS version of a base converter
    345              * that also maps single bytes.
    346              */
    347             if (staticData.conversionType == UConverterType.DBCS
    348                     || (staticData.conversionType == UConverterType.MBCS && staticData.minBytesPerChar >= 2)) {
    349 
    350                 if (baseSharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
    351                     /* the base converter is SI/SO-stateful */
    352                     int entry;
    353 
    354                     /* get the dbcs state from the state table entry for SO=0x0e */
    355                     entry = mbcsTable.stateTable[0][0xe];
    356                     if (MBCS_ENTRY_IS_FINAL(entry) && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_CHANGE_ONLY
    357                             && MBCS_ENTRY_FINAL_STATE(entry) != 0) {
    358                         mbcsTable.dbcsOnlyState = (byte) MBCS_ENTRY_FINAL_STATE(entry);
    359 
    360                         mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
    361                     }
    362                 } else if (baseSharedData.staticData.conversionType == UConverterType.MBCS
    363                         && baseSharedData.staticData.minBytesPerChar == 1
    364                         && baseSharedData.staticData.maxBytesPerChar == 2 && mbcsTable.countStates <= 127) {
    365 
    366                     /* non-stateful base converter, need to modify the state table */
    367                     int newStateTable[][/* 256 */];
    368                     int state[]; // this works because java 2-D array is array of references and we can have state =
    369                     // newStateTable[i];
    370                     int i, count;
    371 
    372                     /* allocate a new state table and copy the base state table contents */
    373                     count = mbcsTable.countStates;
    374                     newStateTable = new int[(count + 1) * 1024][256];
    375 
    376                     for (i = 0; i < mbcsTable.stateTable.length; ++i)
    377                         System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0,
    378                                 mbcsTable.stateTable[i].length);
    379 
    380                     /* change all final single-byte entries to go to a new all-illegal state */
    381                     state = newStateTable[0];
    382                     for (i = 0; i < 256; ++i) {
    383                         if (MBCS_ENTRY_IS_FINAL(state[i])) {
    384                             state[i] = MBCS_ENTRY_TRANSITION(count, 0);
    385                         }
    386                     }
    387 
    388                     /* build the new all-illegal state */
    389                     state = newStateTable[count];
    390                     for (i = 0; i < 256; ++i) {
    391                         state[i] = MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
    392                     }
    393                     mbcsTable.stateTable = newStateTable;
    394                     mbcsTable.countStates = (byte) (count + 1);
    395                     mbcsTable.stateTableOwned = true;
    396 
    397                     mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
    398                 }
    399             }
    400 
    401             /*
    402              * unlike below for files with base tables, do not get the unicodeMask from the sharedData; instead, use the
    403              * base table's unicodeMask, which we copied in the memcpy above; this is necessary because the static data
    404              * unicodeMask, especially the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
    405              */
    406         } else {
    407             /* conversion file with a base table; an additional extension table is optional */
    408             /* make sure that the output type is known */
    409             switch (mbcsTable.outputType) {
    410             case MBCS_OUTPUT_1:
    411             case MBCS_OUTPUT_2:
    412             case MBCS_OUTPUT_3:
    413             case MBCS_OUTPUT_4:
    414             case MBCS_OUTPUT_3_EUC:
    415             case MBCS_OUTPUT_4_EUC:
    416             case MBCS_OUTPUT_2_SISO:
    417                 /* OK */
    418                 break;
    419             default:
    420                 throw new InvalidFormatException();
    421             }
    422 
    423             /*
    424              * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient
    425              * function implementations
    426              */
    427             // agljport:fix info.size=sizeof(UDataInfo);
    428             // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
    429             if (reader.dataFormatHasUnicodeMask()) {
    430                 /* mask off possible future extensions to be safe */
    431                 mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
    432             } else {
    433                 /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
    434                 mbcsTable.unicodeMask = UConverterConstants.HAS_SUPPLEMENTARY | UConverterConstants.HAS_SURROGATES;
    435             }
    436             try {
    437                 reader.readMBCSTable(header, mbcsTable);
    438             } catch (IOException e) {
    439                 throw new InvalidFormatException();
    440             }
    441 
    442             if (offset != 0) {
    443                 try {
    444                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
    445                     // terminator byte all already read;
    446                     // int namelen = baseNameString != null? baseNameString.length() + 1: 0;
    447                     mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
    448                 } catch (IOException e) {
    449                     throw new InvalidFormatException();
    450                 }
    451             }
    452 
    453             if (header.version[1] >= 3 && (mbcsTable.unicodeMask & UConverterConstants.HAS_SURROGATES) == 0 &&
    454                     (mbcsTable.countStates == 1 ? ((char)header.version[2] >= (SBCS_FAST_MAX>>8)) : ((char)header.version[2] >= (MBCS_FAST_MAX>>8)))) {
    455                 mbcsTable.utf8Friendly = true;
    456 
    457                 if (mbcsTable.countStates == 1) {
    458                     /*
    459                      * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
    460                      * Build a table with indexes to each block, to be used instead of
    461                      * the regular stage 1/2 table.
    462                      */
    463 //                    sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
    464 //                    for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
    465 //                        mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
    466 //                    }
    467                     /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */
    468                     mbcsTable.maxFastUChar = SBCS_FAST_MAX;
    469                 } else {
    470                     /*
    471                      * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
    472                      * The .cnv file is prebuilt with an additional stage table with indexes to each block.
    473                      */
    474                     mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);
    475                 }
    476             }
    477             /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
    478             {
    479                 int asciiRoundtrips = 0xffffffff;
    480                 for (int i = 0; i < 0x80; ++i) {
    481                     if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
    482                         asciiRoundtrips &= ~(1 << (i >> 2));
    483                     }
    484                 }
    485                 mbcsTable.asciiRoundtrips = asciiRoundtrips;
    486             }
    487             // TODO: Use asciiRoundtrips to speed up conversion, like in ICU4C.
    488 
    489             if (noFromU) {
    490                 int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40;
    491                 int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2;
    492                 reconstituteData(mbcsTable, stage1Length, stage2Length, header.fullStage2Length);
    493             }
    494             if (mbcsTable.outputType == MBCS_OUTPUT_DBCS_ONLY || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) {
    495                 /*
    496                  * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
    497                  * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
    498                  */
    499                 mbcsTable.asciiRoundtrips = 0;
    500             }
    501         }
    502         // TODO: Use mbcsIndex to speed up UTF-16 conversion, like in ICU4C.
    503         mbcsTable.mbcsIndex = null;
    504         return data;
    505     }
    506 
    507     private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) {
    508         char[] table;
    509         byte[] bytes;
    510         int stage2;
    511         int p;
    512         int c;
    513         int i, st3;
    514 
    515         table = mbcsTable.fromUnicodeTable;
    516         int[] tableInts = mbcsTable.fromUnicodeTableInts;
    517         bytes = mbcsTable.fromUnicodeBytes;
    518         char[] chars = mbcsTable.fromUnicodeChars;
    519         int[] ints = mbcsTable.fromUnicodeInts;
    520 
    521         /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
    522         switch(mbcsTable.outputType) {
    523         case MBCS_OUTPUT_3_EUC:
    524             if(value<=0xffff) {
    525                 /* short sequences are stored directly */
    526                 /* code set 0 or 1 */
    527             } else if(value<=0x8effff) {
    528                 /* code set 2 */
    529                 value&=0x7fff;
    530             } else /* first byte is 0x8f */ {
    531                 /* code set 3 */
    532                 value&=0xff7f;
    533             }
    534             break;
    535         case MBCS_OUTPUT_4_EUC:
    536             if(value<=0xffffff) {
    537                 /* short sequences are stored directly */
    538                 /* code set 0 or 1 */
    539             } else if(value<=0x8effffffL) {
    540                 /* code set 2 */
    541                 value&=0x7fffff;
    542             } else /* first byte is 0x8f */ {
    543                 /* code set 3 */
    544                 value&=0xff7fff;
    545             }
    546             break;
    547         default:
    548             break;
    549         }
    550 
    551         for(i=0; i<=0x1f; ++value, ++i) {
    552             c=codePoints[i];
    553             if(c<0) {
    554                 continue;
    555             }
    556 
    557             /* locate the stage 2 & 3 data */
    558             stage2 = table[c>>10] + ((c>>4)&0x3f);
    559             st3 = tableInts[stage2];
    560             st3 = (char)(st3 * 16 + (c&0xf));
    561 
    562             /* write the codepage bytes into stage 3 */
    563             switch(mbcsTable.outputType) {
    564             case MBCS_OUTPUT_3:
    565             case MBCS_OUTPUT_4_EUC:
    566                 p = st3*3;
    567                 bytes[p] = (byte)(value>>16);
    568                 bytes[p+1] = (byte)(value>>8);
    569                 bytes[p+2] = (byte)value;
    570                 break;
    571             case MBCS_OUTPUT_4:
    572                 ints[st3] = (int)value;
    573                 break;
    574             default:
    575                 /* 2 bytes per character */
    576                 chars[st3] = (char)value;
    577                 break;
    578             }
    579 
    580             // Set the roundtrip flag.
    581             int shift = 16 + (c & 0x0F);
    582             tableInts[stage2] |= (1L << shift);
    583         }
    584         return true;
    585      }
    586 
    587     private static void reconstituteData(UConverterMBCSTable mbcsTable,
    588             int stage1Length, int stage2Length, int fullStage2Length) {
    589         char[] stage1 = mbcsTable.fromUnicodeTable;
    590 
    591         // stage2 starts with unused stage1 space.
    592         // Indexes into stage 2 count from the bottom of the fromUnicodeTable.
    593         int numStage1Ints = stage1Length / 2;  // 2 chars = 1 int
    594         int[] stage2 = new int[numStage1Ints + fullStage2Length];
    595         System.arraycopy(mbcsTable.fromUnicodeTableInts, numStage1Ints,
    596                 stage2, (fullStage2Length - stage2Length) + numStage1Ints,
    597                 stage2Length);
    598         mbcsTable.fromUnicodeTableInts = stage2;
    599 
    600         /* reconstitute the initial part of stage 2 from the mbcsIndex */
    601         {
    602             int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6;
    603             int stageUTF8Index=0;
    604             int st1, st2, st3, i;
    605 
    606             for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) {
    607                 st2 = stage1[st1];
    608                 if (st2 != stage1Length/2) {
    609                     /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
    610                     for (i = 0; i < 16; ++i) {
    611                         st3 = mbcsTable.mbcsIndex.get(stageUTF8Index++);
    612                         if (st3 != 0) {
    613                             /* a stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
    614                             st3>>=4;
    615                             /*
    616                              * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
    617                              * allocated together as a single 64-block for access from the mbcsIndex
    618                              */
    619                             stage2[st2++] = st3++;
    620                             stage2[st2++] = st3++;
    621                             stage2[st2++] = st3++;
    622                             stage2[st2++] = st3;
    623                         } else {
    624                             /* no stage 3 block, skip */
    625                             st2+=4;
    626                         }
    627                     }
    628                 } else {
    629                     /* no stage 2 block, skip */
    630                     stageUTF8Index+=16;
    631                 }
    632             }
    633         }
    634 
    635         switch (mbcsTable.outputType) {
    636         case CharsetMBCS.MBCS_OUTPUT_2:
    637         case CharsetMBCS.MBCS_OUTPUT_2_SISO:
    638         case CharsetMBCS.MBCS_OUTPUT_3_EUC:
    639             mbcsTable.fromUnicodeChars = new char[mbcsTable.fromUBytesLength / 2];
    640             break;
    641         case CharsetMBCS.MBCS_OUTPUT_3:
    642         case CharsetMBCS.MBCS_OUTPUT_4_EUC:
    643             mbcsTable.fromUnicodeBytes = new byte[mbcsTable.fromUBytesLength];
    644             break;
    645         case CharsetMBCS.MBCS_OUTPUT_4:
    646             mbcsTable.fromUnicodeInts = new int[mbcsTable.fromUBytesLength / 4];
    647             break;
    648         default:
    649             // Cannot occur, caller checked already.
    650             assert false;
    651         }
    652 
    653         /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
    654         MBCSEnumToUnicode(mbcsTable);
    655     }
    656 
    657     /*
    658      * Internal function enumerating the toUnicode data of an MBCS converter.
    659      * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
    660      * table, but could also be used for a future getUnicodeSet() option
    661      * that includes reverse fallbacks (after updating this function's implementation).
    662      * Currently only handles roundtrip mappings.
    663      * Does not currently handle extensions.
    664      */
    665     private static void MBCSEnumToUnicode(UConverterMBCSTable mbcsTable) {
    666         /*
    667          * Properties for each state, to speed up the enumeration.
    668          * Ignorable actions are unassigned/illegal/state-change-only:
    669          * They do not lead to mappings.
    670          *
    671          * Bits 7..6
    672          * 1 direct/initial state (stateful converters have mulitple)
    673          * 0 non-initial state with transitions or with nonignorable result actions
    674          * -1 final state with only ignorable actions
    675          *
    676          * Bits 5..3
    677          * The lowest byte value with non-ignorable actions is
    678          * value<<5 (rounded down).
    679          *
    680          * Bits 2..0:
    681          * The highest byte value with non-ignorable actions is
    682          * (value<<5)&0x1f (rounded up).
    683          */
    684         byte stateProps[] = new byte[MBCS_MAX_STATE_COUNT];
    685         int state;
    686 
    687         /* recurse from state 0 and set all stateProps */
    688         getStateProp(mbcsTable.stateTable, stateProps, 0);
    689 
    690         for (state = 0; state < mbcsTable.countStates; ++state) {
    691             if (stateProps[state] >= 0x40) {
    692                 /* start from each direct state */
    693                 enumToU(mbcsTable, stateProps, state, 0, 0);
    694             }
    695         }
    696 
    697 
    698     }
    699 
    700     private static boolean enumToU(UConverterMBCSTable mbcsTable, byte stateProps[], int state, int offset, int value) {
    701         int[] codePoints = new int[32];
    702         int[] row;
    703         char[] unicodeCodeUnits;
    704         int anyCodePoints;
    705         int b, limit;
    706 
    707         row = mbcsTable.stateTable[state];
    708         unicodeCodeUnits = mbcsTable.unicodeCodeUnits;
    709 
    710         value<<=8;
    711         anyCodePoints = -1; /* becomes non-negative if there is a mapping */
    712 
    713         b = (stateProps[state]&0x38)<<2;
    714         if (b == 0 && stateProps[state] >= 0x40) {
    715             /* skip byte sequences with leading zeros because they are note stored in the fromUnicode table */
    716             codePoints[0] = UConverterConstants.U_SENTINEL;
    717             b = 1;
    718         }
    719         limit = ((stateProps[state]&7)+1)<<5;
    720         while (b < limit) {
    721             int entry = row[b];
    722             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    723                 int nextState = MBCS_ENTRY_TRANSITION_STATE(entry);
    724                 if (stateProps[nextState] >= 0) {
    725                     /* recurse to a state with non-ignorable actions */
    726                     if (!enumToU(mbcsTable, stateProps, nextState, offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), value|b)) {
    727                         return false;
    728                     }
    729                 }
    730                 codePoints[b&0x1f] = UConverterConstants.U_SENTINEL;
    731             } else {
    732                 int c;
    733                 int action;
    734 
    735                 /*
    736                  * An if-else-if chain provides more reliable performance for
    737                  * the most common cases compared to a switch.
    738                  */
    739                 action = MBCS_ENTRY_FINAL_ACTION(entry);
    740                 if (action == MBCS_STATE_VALID_DIRECT_16) {
    741                     /* output BMP code point */
    742                     c = MBCS_ENTRY_FINAL_VALUE_16(entry);
    743                 } else if (action == MBCS_STATE_VALID_16) {
    744                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
    745                     c = unicodeCodeUnits[finalOffset];
    746                     if (c < 0xfffe) {
    747                         /* output BMP code point */
    748                     } else {
    749                         c = UConverterConstants.U_SENTINEL;
    750                     }
    751                 } else if (action == MBCS_STATE_VALID_16_PAIR) {
    752                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
    753                     c = unicodeCodeUnits[finalOffset++];
    754                     if (c < 0xd800) {
    755                         /* output BMP code point below 0xd800 */
    756                     } else if (c <= 0xdbff) {
    757                         /* output roundtrip or fallback supplementary code point */
    758                         c = ((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
    759                     } else if (c == 0xe000) {
    760                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
    761                         c = unicodeCodeUnits[finalOffset];
    762                     } else {
    763                         c = UConverterConstants.U_SENTINEL;
    764                     }
    765                 } else if (action == MBCS_STATE_VALID_DIRECT_20) {
    766                     /* output supplementary code point */
    767                     c = MBCS_ENTRY_FINAL_VALUE(entry)+0x10000;
    768                 } else {
    769                     c = UConverterConstants.U_SENTINEL;
    770                 }
    771 
    772                 codePoints[b&0x1f] = c;
    773                 anyCodePoints&=c;
    774             }
    775             if (((++b)&0x1f) == 0) {
    776                 if(anyCodePoints>=0) {
    777                     if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20), codePoints)) {
    778                         return false;
    779                     }
    780                     anyCodePoints=-1;
    781                 }
    782             }
    783         }
    784 
    785         return true;
    786     }
    787 
    788     /*
    789      * Only called if stateProps[state]==-1.
    790      * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
    791      * MBCS_STATE_CHANGE_ONLY.
    792      */
    793     private static byte getStateProp(int stateTable[][], byte stateProps[], int state) {
    794         int[] row;
    795         int min, max, entry, nextState;
    796 
    797         row = stateTable[state];
    798         stateProps[state] = 0;
    799 
    800         /* find first non-ignorable state */
    801         for (min = 0;;++min) {
    802             entry = row[min];
    803             nextState = MBCS_ENTRY_STATE(entry);
    804             if (stateProps[nextState] == -1) {
    805                 getStateProp(stateTable, stateProps, nextState);
    806             }
    807             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    808                 if (stateProps[nextState] >- 0) {
    809                     break;
    810                 }
    811             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
    812                 break;
    813             }
    814             if (min == 0xff) {
    815                 stateProps[state] = -0x40;  /* (byte)0xc0 */
    816                 return stateProps[state];
    817             }
    818         }
    819         stateProps[state]|=(byte)((min>>5)<<3);
    820 
    821         /* find last non-ignorable state */
    822         for (max = 0xff; min < max; --max) {
    823             entry = row[max];
    824             nextState = MBCS_ENTRY_STATE(entry);
    825             if (stateProps[nextState] == -1) {
    826                 getStateProp(stateTable, stateProps, nextState);
    827             }
    828             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    829                 if (stateProps[nextState] >- 0) {
    830                     break;
    831                 }
    832             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
    833                 break;
    834             }
    835         }
    836         stateProps[state]|=(byte)(max>>5);
    837 
    838         /* recurse further and collect direct-state information */
    839         while (min <= max) {
    840             entry = row[min];
    841             nextState = MBCS_ENTRY_STATE(entry);
    842             if (stateProps[nextState] == -1) {
    843                 getStateProp(stateTable, stateProps, nextState);
    844             }
    845             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
    846                 stateProps[nextState]|=0x40;
    847                 if (MBCS_ENTRY_FINAL_ACTION(entry) <= MBCS_STATE_FALLBACK_DIRECT_20) {
    848                     stateProps[state]|=0x40;
    849                 }
    850             }
    851             ++min;
    852         }
    853         return stateProps[state];
    854     }
    855 
    856     protected void initializeConverter(int myOptions) {
    857         UConverterMBCSTable mbcsTable;
    858         ByteBuffer extIndexes;
    859         short outputType;
    860         byte maxBytesPerUChar;
    861 
    862         mbcsTable = sharedData.mbcs;
    863         outputType = mbcsTable.outputType;
    864 
    865         if (outputType == MBCS_OUTPUT_DBCS_ONLY) {
    866             /* the swaplfnl option does not apply, remove it */
    867             this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL;
    868         }
    869 
    870         if ((myOptions & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
    871             /* do this because double-checked locking is broken */
    872             boolean isCached;
    873 
    874             // agljport:todo umtx_lock(NULL);
    875             isCached = mbcsTable.swapLFNLStateTable != null;
    876             // agljport:todo umtx_unlock(NULL);
    877 
    878             if (!isCached) {
    879                 try {
    880                     if (!EBCDICSwapLFNL()) {
    881                         /* this option does not apply, remove it */
    882                         this.options = myOptions & ~UConverterConstants.OPTION_SWAP_LFNL;
    883                     }
    884                 } catch (Exception e) {
    885                     /* something went wrong. */
    886                     return;
    887                 }
    888             }
    889         }
    890 
    891         String lowerCaseName = icuCanonicalName.toLowerCase(Locale.ENGLISH);
    892         if (lowerCaseName.indexOf("gb18030") >= 0) {
    893             /* set a flag for GB 18030 mode, which changes the callback behavior */
    894             this.options |= MBCS_OPTION_GB18030;
    895         } else if (lowerCaseName.indexOf("keis") >= 0) {
    896             this.options |= MBCS_OPTION_KEIS;
    897         } else if (lowerCaseName.indexOf("jef") >= 0) {
    898             this.options |= MBCS_OPTION_JEF;
    899         } else if (lowerCaseName.indexOf("jips") >= 0) {
    900             this.options |= MBCS_OPTION_JIPS;
    901         }
    902 
    903         /* fix maxBytesPerUChar depending on outputType and options etc. */
    904         if (outputType == MBCS_OUTPUT_2_SISO) {
    905             /* changed from 3 to 4 in ICU4J only. #9205 */
    906             maxBytesPerChar = 4; /* SO+DBCS+SI*/
    907         }
    908 
    909         extIndexes = mbcsTable.extIndexes;
    910         if (extIndexes != null) {
    911             maxBytesPerUChar = (byte) GET_MAX_BYTES_PER_UCHAR(extIndexes);
    912             if (outputType == MBCS_OUTPUT_2_SISO) {
    913                 ++maxBytesPerUChar; /* SO + multiple DBCS */
    914             }
    915 
    916             if (maxBytesPerUChar > maxBytesPerChar) {
    917                 maxBytesPerChar = maxBytesPerUChar;
    918             }
    919         }
    920     }
    921      /* EBCDIC swap LF<->NL--------------------------------------------------------------------------------*/
    922      /*
    923       * This code modifies a standard EBCDIC<->Unicode mappling table for
    924       * OS/390 (z/OS) Unix System Services (Open Edition).
    925       * The difference is in the mapping of Line Feed and New Line control codes:
    926       * Standard EBDIC maps
    927       *
    928       * <U000A> \x25 |0
    929       * <U0085> \x15 |0
    930       *
    931       * but OS/390 USS EBCDIC swaps the control codes for LF and NL,
    932       * mapping
    933       *
    934       * <U000A> \x15 |0
    935       * <U0085> \x25 |0
    936       *
    937       * This code modifies a loaded standard EBCDIC<->Unicode mapping table
    938       * by copying it into allocated memory and swapping the LF and NL values.
    939       * It allows to support the same EBCDIC charset in both version without
    940       * duplicating the entire installed table.
    941       */
    942     /* standard EBCDIC codes */
    943     private static final short EBCDIC_LF = 0x0025;
    944     private static final short EBCDIC_NL = 0x0015;
    945 
    946     /* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
    947     private static final short EBCDIC_RT_LF = 0x0f25;
    948     private static final short EBCDIC_RT_NL = 0x0f15;
    949 
    950     /* Unicode code points */
    951     private static final short U_LF = 0x000A;
    952     private static final short U_NL = 0x0085;
    953 
    954     private boolean EBCDICSwapLFNL() throws Exception {
    955         UConverterMBCSTable mbcsTable;
    956 
    957         char[] table;
    958 
    959         int[][] newStateTable;
    960         String newName;
    961 
    962         int stage2Entry;
    963 
    964         mbcsTable = sharedData.mbcs;
    965 
    966         table = mbcsTable.fromUnicodeTable;
    967         int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
    968         char[] chars = mbcsTable.fromUnicodeChars;
    969         char[] results = chars;
    970 
    971         /*
    972          * Check that this is an EBCDIC table with SBCS portion -
    973          * SBCS or EBCDIC with standard EBCDIC LF and NL mappings.
    974          *
    975          * If not, ignore the option. Options are always ignored if they do not apply.
    976          */
    977         if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) &&
    978               mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
    979               mbcsTable.stateTable[0][EBCDIC_NL] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL))) {
    980             return false;
    981         }
    982 
    983         if (mbcsTable.outputType == MBCS_OUTPUT_1) {
    984             if (!(EBCDIC_RT_LF == MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
    985                   EBCDIC_RT_NL == MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL))) {
    986                 return false;
    987             }
    988         } else /* MBCS_OUTPUT_2_SISO */ {
    989             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
    990             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) &&
    991                   EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_LF))) {
    992                 return false;
    993             }
    994 
    995             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
    996             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) &&
    997                   EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_NL))) {
    998                 return false;
    999             }
   1000         }
   1001 
   1002         if (mbcsTable.fromUBytesLength > 0) {
   1003             /*
   1004              * We _know_ the number of bytes in the fromUnicodeBytes array
   1005              * starting with header.version 4.1.
   1006              */
   1007             // sizeofFromUBytes = mbcsTable.fromUBytesLength;
   1008         } else {
   1009             /*
   1010              * Otherwise:
   1011              * There used to be code to enumerate the fromUnicode
   1012              * trie and find the highest entry, but it was removed in ICU 3.2
   1013              * because it was not tested and caused a low code coverage number.
   1014              */
   1015             throw new Exception("U_INVALID_FORMAT_ERROR");
   1016         }
   1017 
   1018         /*
   1019          * The table has an appropriate format.
   1020          * Allocate and build
   1021          * - a modified to-Unicode state table
   1022          * - a modified from-Unicode output array
   1023          * - a converter name string with the swap option appended
   1024          */
   1025 //        size = mbcsTable.countStates * 1024 + sizeofFromUBytes + UConverterConstants.MAX_CONVERTER_NAME_LENGTH + 20;
   1026 
   1027         /* copy and modify the to-Unicode state table */
   1028         newStateTable = new int[mbcsTable.stateTable.length][mbcsTable.stateTable[0].length];
   1029         for (int i = 0; i < newStateTable.length; i++) {
   1030             System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, newStateTable[i].length);
   1031         }
   1032 
   1033         newStateTable[0][EBCDIC_LF] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
   1034         newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
   1035 
   1036         /* copy and modify the from-Unicode result table */
   1037         char[] newResults = new char[chars.length];
   1038         System.arraycopy(chars, 0, newResults, 0, chars.length);
   1039         /* conveniently, the table access macros work on the left side of expressions */
   1040         if (mbcsTable.outputType == MBCS_OUTPUT_1) {
   1041             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL);
   1042             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF);
   1043         } else /* MBCS_OUTPUT_2_SISO */ {
   1044             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
   1045             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL);
   1046 
   1047             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
   1048             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF);
   1049         }
   1050 
   1051         /* set the canonical converter name */
   1052         newName = icuCanonicalName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING);
   1053 
   1054         if (mbcsTable.swapLFNLStateTable == null) {
   1055             mbcsTable.swapLFNLStateTable = newStateTable;
   1056             mbcsTable.swapLFNLFromUnicodeChars = newResults;
   1057             mbcsTable.swapLFNLName = newName;
   1058         }
   1059         return true;
   1060     }
   1061 
   1062     /**
   1063      * MBCS output types for conversions from Unicode. These per-converter types determine the storage method in stage 3
   1064      * of the lookup table, mostly how many bytes are stored per entry.
   1065      */
   1066     static final int MBCS_OUTPUT_1 = 0; /* 0 */
   1067     static final int MBCS_OUTPUT_2 = MBCS_OUTPUT_1 + 1; /* 1 */
   1068     static final int MBCS_OUTPUT_3 = MBCS_OUTPUT_2 + 1; /* 2 */
   1069     static final int MBCS_OUTPUT_4 = MBCS_OUTPUT_3 + 1; /* 3 */
   1070     static final int MBCS_OUTPUT_3_EUC = 8; /* 8 */
   1071     static final int MBCS_OUTPUT_4_EUC = MBCS_OUTPUT_3_EUC + 1; /* 9 */
   1072     static final int MBCS_OUTPUT_2_SISO = 12; /* c */
   1073     static final int MBCS_OUTPUT_2_HZ = MBCS_OUTPUT_2_SISO + 1; /* d */
   1074     static final int MBCS_OUTPUT_EXT_ONLY = MBCS_OUTPUT_2_HZ + 1; /* e */
   1075     // static final int MBCS_OUTPUT_COUNT = MBCS_OUTPUT_EXT_ONLY + 1;
   1076     static final int MBCS_OUTPUT_DBCS_ONLY = 0xdb; /* runtime-only type for DBCS-only handling of SISO tables */
   1077 
   1078     /* GB 18030 data ------------------------------------------------------------ */
   1079 
   1080     /* helper macros for linear values for GB 18030 four-byte sequences */
   1081     private static int LINEAR_18030(int a, int b, int c, int d) {
   1082         return ((((a & 0xff) * 10 + (b & 0xff)) * 126 + (c & 0xff)) * 10 + (d & 0xff));
   1083     }
   1084 
   1085     private static int LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
   1086 
   1087     private static int LINEAR(int x) {
   1088         return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff);
   1089     }
   1090 
   1091     /*
   1092      * Some ranges of GB 18030 where both the Unicode code points and the GB four-byte sequences are contiguous and are
   1093      * handled algorithmically by the special callback functions below. The values are start & end of Unicode & GB
   1094      * codes.
   1095      *
   1096      * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30.
   1097      */
   1098     private static final int gb18030Ranges[][] = new int[/* 14 */][/* 4 */] {
   1099             { 0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35) },
   1100             { 0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738) },
   1101             { 0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436) },
   1102             { 0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531) },
   1103             { 0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534) },
   1104             { 0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38) },
   1105             { 0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537) },
   1106             { 0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32) },
   1107             { 0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237) },
   1108             { 0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733) },
   1109             { 0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837) },
   1110             { 0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638) },
   1111             { 0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931) },
   1112             { 0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439) } };
   1113 
   1114     /* bit flag for UConverter.options indicating GB 18030 special handling */
   1115     private static final int MBCS_OPTION_GB18030 = 0x8000;
   1116 
   1117     /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
   1118     private static final int MBCS_OPTION_KEIS = 0x01000;
   1119     private static final int MBCS_OPTION_JEF = 0x02000;
   1120     private static final int MBCS_OPTION_JIPS = 0x04000;
   1121 
   1122     private static enum SISO_Option {
   1123         SI,
   1124         SO
   1125     }
   1126 
   1127     private static final byte[] KEIS_SO_CHAR = { 0x0A, 0x42 };
   1128     private static final byte[] KEIS_SI_CHAR = { 0x0A, 0x41 };
   1129     private static final byte JEF_SO_CHAR = 0x28;
   1130     private static final byte JEF_SI_CHAR = 0x29;
   1131     private static final byte[] JIPS_SO_CHAR = { 0x1A, 0x70 };
   1132     private static final byte[] JIPS_SI_CHAR = { 0x1A, 0x71 };
   1133 
   1134     private static int getSISOBytes(SISO_Option option, int cnvOption, byte[] value) {
   1135         int SISOLength = 0;
   1136 
   1137         switch (option) {
   1138             case SI:
   1139                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
   1140                     value[0] = KEIS_SI_CHAR[0];
   1141                     value[1] = KEIS_SI_CHAR[1];
   1142                     SISOLength = 2;
   1143                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
   1144                     value[0] = JEF_SI_CHAR;
   1145                     SISOLength = 1;
   1146                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
   1147                     value[0] = JIPS_SI_CHAR[0];
   1148                     value[1] = JIPS_SI_CHAR[1];
   1149                     SISOLength = 2;
   1150                 } else {
   1151                     value[0] = UConverterConstants.SI;
   1152                     SISOLength = 1;
   1153                 }
   1154                 break;
   1155             case SO:
   1156                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
   1157                     value[0] = KEIS_SO_CHAR[0];
   1158                     value[1] = KEIS_SO_CHAR[1];
   1159                     SISOLength = 2;
   1160                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
   1161                     value[0] = JEF_SO_CHAR;
   1162                     SISOLength = 1;
   1163                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
   1164                     value[0] = JIPS_SO_CHAR[0];
   1165                     value[1] = JIPS_SO_CHAR[1];
   1166                     SISOLength = 2;
   1167                 } else {
   1168                     value[0] = UConverterConstants.SO;
   1169                     SISOLength = 1;
   1170                 }
   1171                 break;
   1172             default:
   1173                 /* Should never happen. */
   1174                 break;
   1175         }
   1176 
   1177         return SISOLength;
   1178     }
   1179     // enum {
   1180         static final int MBCS_MAX_STATE_COUNT = 128;
   1181     // };
   1182     /**
   1183      * MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries.
   1184      */
   1185     static final int MBCS_STATE_VALID_DIRECT_16 = 0;
   1186     static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
   1187     static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
   1188     static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
   1189     static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
   1190     static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
   1191     static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
   1192     static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
   1193     static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
   1194 
   1195     static int MBCS_ENTRY_SET_STATE(int entry, int state) {
   1196         return (entry&0x80ffffff)|(state<<24L);
   1197     }
   1198 
   1199     static int MBCS_ENTRY_STATE(int entry) {
   1200         return (((entry)>>24)&0x7f);
   1201     }
   1202 
   1203     /* Methods for state table entries */
   1204     static int MBCS_ENTRY_TRANSITION(int state, int offset) {
   1205         return (state << 24L) | offset;
   1206     }
   1207 
   1208     static int MBCS_ENTRY_FINAL(int state, int action, int value) {
   1209         return 0x80000000 | (state << 24L) | (action << 20L) | value;
   1210     }
   1211 
   1212     static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
   1213         return (entry) >= 0;
   1214     }
   1215 
   1216     static boolean MBCS_ENTRY_IS_FINAL(int entry) {
   1217         return (entry) < 0;
   1218     }
   1219 
   1220     static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
   1221         return ((entry) >>> 24);
   1222     }
   1223 
   1224     static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
   1225         return ((entry) & 0xffffff);
   1226     }
   1227 
   1228     static int MBCS_ENTRY_FINAL_STATE(int entry) {
   1229         return ((entry) >>> 24) & 0x7f;
   1230     }
   1231 
   1232     static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
   1233         return ((entry) < 0x80100000);
   1234     }
   1235 
   1236     static int MBCS_ENTRY_FINAL_ACTION(int entry) {
   1237         return ((entry) >>> 20) & 0xf;
   1238     }
   1239 
   1240     static int MBCS_ENTRY_FINAL_VALUE(int entry) {
   1241         return ((entry) & 0xfffff);
   1242     }
   1243 
   1244     static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
   1245         return (char) (entry);
   1246     }
   1247 
   1248     static boolean MBCS_IS_ASCII_ROUNDTRIP(int b, long asciiRoundtrips) {
   1249         return (((asciiRoundtrips) & (1<<((b)>>2)))!=0);
   1250     }
   1251 
   1252     /**
   1253      * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. It works for single-byte,
   1254      * single-state codepages that only map to and from BMP code points, and it always returns fallback values.
   1255      */
   1256     static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) {
   1257         assert 0 <= b && b <= 0xff;
   1258         return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b]);
   1259     }
   1260 
   1261     /* single-byte fromUnicode: get the 16-bit result word */
   1262     static char MBCS_SINGLE_RESULT_FROM_U(char[] table, char[] results, int c) {
   1263         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
   1264         int i = table[i1] + (c & 0xf);
   1265         return results[i];
   1266     }
   1267 
   1268     /* single-byte fromUnicode: set the 16-bit result word with newValue*/
   1269     static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, char[] results, int c, int newValue) {
   1270         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
   1271         int i = table[i1] + (c & 0xf);
   1272         results[i] = (char) newValue;
   1273     }
   1274 
   1275     /* multi-byte fromUnicode: get the 32-bit stage 2 entry */
   1276     static int MBCS_STAGE_2_FROM_U(char[] table, int[] tableInts, int c) {
   1277         int i = table[(c) >>> 10] + ((c >>> 4) & 0x3f);
   1278         return tableInts[i];
   1279     }
   1280 
   1281     private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {
   1282         return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
   1283     }
   1284 
   1285     static char MBCS_VALUE_2_FROM_STAGE_2(char[] chars, int stage2Entry, int c) {
   1286         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
   1287         return chars[i];
   1288     }
   1289 
   1290     static void MBCS_VALUE_2_FROM_STAGE_2_SET(char[] chars, int stage2Entry, int c, int newValue) {
   1291         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
   1292         chars[i] = (char) newValue;
   1293     }
   1294 
   1295     private static int MBCS_VALUE_4_FROM_STAGE_2(int[] ints, int stage2Entry, int c) {
   1296         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
   1297         return ints[i];
   1298     }
   1299 
   1300     static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
   1301         return ((16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
   1302     }
   1303 
   1304     // ------------UConverterExt-------------------------------------------------------
   1305 
   1306     static final int EXT_INDEXES_LENGTH = 0; /* 0 */
   1307 
   1308     static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
   1309     static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
   1310     static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
   1311     static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
   1312 
   1313     static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
   1314     static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
   1315     static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
   1316     static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
   1317     static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
   1318 
   1319     static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
   1320     static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
   1321     static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
   1322     static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
   1323     static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
   1324     static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
   1325     static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
   1326 
   1327     private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */
   1328     // private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;
   1329     // private static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1;
   1330     //
   1331     // private static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */
   1332     //
   1333     // private static final int EXT_SIZE=31;
   1334     // private static final int EXT_INDEXES_MIN_LENGTH=32;
   1335 
   1336     static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
   1337 
   1338     /* toUnicode helpers -------------------------------------------------------- */
   1339 
   1340     private static final int TO_U_BYTE_SHIFT = 24;
   1341     private static final int TO_U_VALUE_MASK = 0xffffff;
   1342     private static final int TO_U_MIN_CODE_POINT = 0x1f0000;
   1343     private static final int TO_U_MAX_CODE_POINT = 0x2fffff;
   1344     private static final int TO_U_ROUNDTRIP_FLAG = (1 << 23);
   1345     private static final int TO_U_INDEX_MASK = 0x3ffff;
   1346     private static final int TO_U_LENGTH_SHIFT = 18;
   1347     private static final int TO_U_LENGTH_OFFSET = 12;
   1348 
   1349     /* maximum number of indexed UChars */
   1350     static final int MAX_UCHARS = 19;
   1351 
   1352     static int TO_U_GET_BYTE(int word) {
   1353         return word >>> TO_U_BYTE_SHIFT;
   1354     }
   1355 
   1356     static int TO_U_GET_VALUE(int word) {
   1357         return word & TO_U_VALUE_MASK;
   1358     }
   1359 
   1360     static boolean TO_U_IS_ROUNDTRIP(int value) {
   1361         return (value & TO_U_ROUNDTRIP_FLAG) != 0;
   1362     }
   1363 
   1364     static boolean TO_U_IS_PARTIAL(int value) {
   1365         return 0 <= value && value < TO_U_MIN_CODE_POINT;
   1366     }
   1367 
   1368     static int TO_U_GET_PARTIAL_INDEX(int value) {
   1369         return value;
   1370     }
   1371 
   1372     static int TO_U_MASK_ROUNDTRIP(int value) {
   1373         return value & ~TO_U_ROUNDTRIP_FLAG;
   1374     }
   1375 
   1376     private static int TO_U_MAKE_WORD(byte b, int value) {
   1377         // TO_U_BYTE_SHIFT == 24: safe to just shift the signed byte-as-int.
   1378         return (b << TO_U_BYTE_SHIFT) | value;
   1379     }
   1380 
   1381     /* use after masking off the roundtrip flag */
   1382     static boolean TO_U_IS_CODE_POINT(int value) {
   1383         assert value >= 0;
   1384         return value <= TO_U_MAX_CODE_POINT;
   1385     }
   1386 
   1387     static int TO_U_GET_CODE_POINT(int value) {
   1388         assert value >= 0;
   1389         return value - TO_U_MIN_CODE_POINT;
   1390     }
   1391 
   1392     private static int TO_U_GET_INDEX(int value) {
   1393         return value & TO_U_INDEX_MASK;
   1394     }
   1395 
   1396     private static int TO_U_GET_LENGTH(int value) {
   1397         return (value >>> TO_U_LENGTH_SHIFT) - TO_U_LENGTH_OFFSET;
   1398     }
   1399 
   1400     /* fromUnicode helpers ------------------------------------------------------ */
   1401 
   1402     /* most trie constants are shared with ucnvmbcs.h */
   1403     private static final int STAGE_2_LEFT_SHIFT = 2;
   1404 
   1405     // private static final int STAGE_3_GRANULARITY = 4;
   1406 
   1407     /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
   1408     static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
   1409         return stage3.get((stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT)
   1410                 + (c & 0xf));
   1411     }
   1412 
   1413     private static final int FROM_U_LENGTH_SHIFT = 24;
   1414     private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31;
   1415     static final int FROM_U_RESERVED_MASK = 0x60000000;
   1416     private static final int FROM_U_DATA_MASK = 0xffffff;
   1417 
   1418     /* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
   1419     static final int FROM_U_SUBCHAR1 = 0x80000001;
   1420 
   1421     /* at most 3 bytes in the lower part of the value */
   1422     private static final int FROM_U_MAX_DIRECT_LENGTH = 3;
   1423 
   1424     /* maximum number of indexed bytes */
   1425     static final int MAX_BYTES = 0x1f;
   1426 
   1427     static boolean FROM_U_IS_PARTIAL(int value) {
   1428         return (value >>> FROM_U_LENGTH_SHIFT) == 0;
   1429     }
   1430 
   1431     static int FROM_U_GET_PARTIAL_INDEX(int value) {
   1432         return value;
   1433     }
   1434 
   1435     static boolean FROM_U_IS_ROUNDTRIP(int value) {
   1436         return (value & FROM_U_ROUNDTRIP_FLAG) != 0;
   1437     }
   1438 
   1439     private static int FROM_U_MASK_ROUNDTRIP(int value) {
   1440         return value & ~FROM_U_ROUNDTRIP_FLAG;
   1441     }
   1442 
   1443     /* use after masking off the roundtrip flag */
   1444     static int FROM_U_GET_LENGTH(int value) {
   1445         return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES;
   1446     }
   1447 
   1448     /* get bytes or bytes index */
   1449     static int FROM_U_GET_DATA(int value) {
   1450         return value & FROM_U_DATA_MASK;
   1451     }
   1452 
   1453     /* get the pointer to an extension array from indexes[index] */
   1454     static Buffer ARRAY(ByteBuffer indexes, int index, Class<?> itemType) {
   1455         int oldpos = indexes.position();
   1456         Buffer b;
   1457 
   1458         // TODO: It is very inefficient to create Buffer objects for each array access.
   1459         // We should create an inner class Extensions (or sibling class CharsetMBCSExtensions)
   1460         // which has buffers for the arrays, together with the code that works with them.
   1461         indexes.position(indexes.getInt(index << 2));
   1462         if (itemType == int.class)
   1463             b = indexes.asIntBuffer();
   1464         else if (itemType == char.class)
   1465             b = indexes.asCharBuffer();
   1466         else if (itemType == short.class)
   1467             b = indexes.asShortBuffer();
   1468         else
   1469             // default or (itemType == byte.class)
   1470             b = indexes.slice();
   1471         indexes.position(oldpos);
   1472         return b;
   1473     }
   1474 
   1475     private static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes) {
   1476         indexes.position(0);
   1477         return indexes.getInt(EXT_COUNT_BYTES) & 0xff;
   1478     }
   1479 
   1480     /*
   1481      * @return index of the UChar, if found; else <0
   1482      */
   1483     static int findFromU(CharBuffer fromUSection, int length, char u) {
   1484         int i, start, limit;
   1485 
   1486         /* binary search */
   1487         start = 0;
   1488         limit = length;
   1489         for (;;) {
   1490             i = limit - start;
   1491             if (i <= 1) {
   1492                 break; /* done */
   1493             }
   1494             /* start<limit-1 */
   1495 
   1496             if (i <= 4) {
   1497                 /* linear search for the last part */
   1498                 if (u <= fromUSection.get(fromUSection.position() + start)) {
   1499                     break;
   1500                 }
   1501                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {
   1502                     break;
   1503                 }
   1504                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {
   1505                     break;
   1506                 }
   1507                 /* always break at start==limit-1 */
   1508                 ++start;
   1509                 break;
   1510             }
   1511 
   1512             i = (start + limit) / 2;
   1513             if (u < fromUSection.get(fromUSection.position() + i)) {
   1514                 limit = i;
   1515             } else {
   1516                 start = i;
   1517             }
   1518         }
   1519 
   1520         /* did we really find it? */
   1521         if (start < limit && u == fromUSection.get(fromUSection.position() + start)) {
   1522             return start;
   1523         } else {
   1524             return -1; /* not found */
   1525         }
   1526     }
   1527 
   1528     /*
   1529      * @return lookup value for the byte, if found; else 0
   1530      */
   1531     static int findToU(IntBuffer toUSection, int length, short byt) {
   1532         long word0, word;
   1533         int i, start, limit;
   1534 
   1535         /* check the input byte against the lowest and highest section bytes */
   1536         // agljport:comment instead of receiving a start position parameter for toUSection we'll rely on its position
   1537         // property
   1538         start = TO_U_GET_BYTE(toUSection.get(toUSection.position()));
   1539         limit = TO_U_GET_BYTE(toUSection.get(toUSection.position() + length - 1));
   1540         if (byt < start || limit < byt) {
   1541             return 0; /* the byte is out of range */
   1542         }
   1543 
   1544         if (length == ((limit - start) + 1)) {
   1545             /* direct access on a linear array */
   1546             return TO_U_GET_VALUE(toUSection.get(toUSection.position() + byt - start)); /* could be 0 */
   1547         }
   1548 
   1549         /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
   1550         word0 = TO_U_MAKE_WORD((byte) byt, 0) & UConverterConstants.UNSIGNED_INT_MASK;
   1551 
   1552         /*
   1553          * Shift byte once instead of each section word and add 0xffffff. We will compare the shifted/added byte
   1554          * (bbffffff) against section words which have byte values in the same bit position. If and only if byte bb <
   1555          * section byte ss then bbffffff<ssvvvvvv for all v=0..f so we need not mask off the lower 24 bits of each
   1556          * section word.
   1557          */
   1558         word = word0 | TO_U_VALUE_MASK;
   1559 
   1560         /* binary search */
   1561         start = 0;
   1562         limit = length;
   1563         for (;;) {
   1564             i = limit - start;
   1565             if (i <= 1) {
   1566                 break; /* done */
   1567             }
   1568             /* start<limit-1 */
   1569 
   1570             if (i <= 4) {
   1571                 /* linear search for the last part */
   1572                 if (word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1573                     break;
   1574                 }
   1575                 if (++start < limit
   1576                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1577                     break;
   1578                 }
   1579                 if (++start < limit
   1580                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1581                     break;
   1582                 }
   1583                 /* always break at start==limit-1 */
   1584                 ++start;
   1585                 break;
   1586             }
   1587 
   1588             i = (start + limit) / 2;
   1589             if (word < (toUSection.get(toUSection.position() + i) & UConverterConstants.UNSIGNED_INT_MASK)) {
   1590                 limit = i;
   1591             } else {
   1592                 start = i;
   1593             }
   1594         }
   1595 
   1596         /* did we really find it? */
   1597         if (start < limit) {
   1598             word = (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK);
   1599             if (byt == TO_U_GET_BYTE((int)word)) {
   1600                 return TO_U_GET_VALUE((int) word); /* never 0 */
   1601             }
   1602         }
   1603         return 0; /* not found */
   1604     }
   1605 
   1606     /*
   1607      * TRUE if not an SI/SO stateful converter, or if the match length fits with the current converter state
   1608      */
   1609     static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {
   1610         return sisoState < 0 || (sisoState == 0) == (match == 1);
   1611     }
   1612 
   1613     /*
   1614      * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), or 1 for DBCS-only, or -1 if the converter is not
   1615      * SI/SO stateful
   1616      *
   1617      * Note: For SI/SO stateful converters getting here, cnv->mode==0 is equivalent to firstLength==1.
   1618      */
   1619     private static int SISO_STATE(UConverterSharedData sharedData, int mode) {
   1620         return sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO ? (byte) mode
   1621                 : sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;
   1622     }
   1623 
   1624     class CharsetDecoderMBCS extends CharsetDecoderICU {
   1625 
   1626         CharsetDecoderMBCS(CharsetICU cs) {
   1627             super(cs);
   1628         }
   1629 
   1630         @Override
   1631         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
   1632         /* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */
   1633             return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
   1634         }
   1635 
   1636         /*
   1637          * continue partial match with new input never called for simple, single-character conversion
   1638          */
   1639         private CoderResult continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex,
   1640                 boolean flush) {
   1641             CoderResult cr = CoderResult.UNDERFLOW;
   1642 
   1643             int[] value = new int[1];
   1644             int match, length;
   1645 
   1646             match = matchToU((byte) SISO_STATE(sharedData, mode), preToUArray, preToUBegin, preToULength, source,
   1647                     value, isToUUseFallback(), flush);
   1648 
   1649             if (match > 0) {
   1650                 if (match >= preToULength) {
   1651                     /* advance src pointer for the consumed input */
   1652                     source.position(source.position() + match - preToULength);
   1653                     preToULength = 0;
   1654                 } else {
   1655                     /* the match did not use all of preToU[] - keep the rest for replay */
   1656                     length = preToULength - match;
   1657                     System.arraycopy(preToUArray, preToUBegin + match, preToUArray, preToUBegin, length);
   1658                     preToULength = (byte) -length;
   1659                 }
   1660 
   1661                 /* write result */
   1662                 cr = writeToU(value[0], target, offsets, srcIndex);
   1663             } else if (match < 0) {
   1664                 /* save state for partial match */
   1665                 int j, sArrayIndex;
   1666 
   1667                 /* just _append_ the newly consumed input to preToU[] */
   1668                 sArrayIndex = source.position();
   1669                 match = -match;
   1670                 for (j = preToULength; j < match; ++j) {
   1671                     preToUArray[j] = source.get(sArrayIndex++);
   1672                 }
   1673                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
   1674                 preToULength = (byte) match;
   1675             } else /* match==0 */{
   1676                 /*
   1677                  * no match
   1678                  *
   1679                  * We need to split the previous input into two parts:
   1680                  *
   1681                  * 1. The first codepage character is unmappable - that's how we got into trying the extension data in
   1682                  * the first place. We need to move it from the preToU buffer to the error buffer, set an error code,
   1683                  * and prepare the rest of the previous input for 2.
   1684                  *
   1685                  * 2. The rest of the previous input must be converted once we come back from the callback for the first
   1686                  * character. At that time, we have to try again from scratch to convert these input characters. The
   1687                  * replay will be handled by the ucnv.c conversion code.
   1688                  */
   1689 
   1690                 /* move the first codepage character to the error field */
   1691                 System.arraycopy(preToUArray, preToUBegin, toUBytesArray, toUBytesBegin, preToUFirstLength);
   1692                 toULength = preToUFirstLength;
   1693 
   1694                 /* move the rest up inside the buffer */
   1695                 length = preToULength - preToUFirstLength;
   1696                 if (length > 0) {
   1697                     System.arraycopy(preToUArray, preToUBegin + preToUFirstLength, preToUArray, preToUBegin, length);
   1698                 }
   1699 
   1700                 /* mark preToU for replay */
   1701                 preToULength = (byte) -length;
   1702 
   1703                 /* set the error code for unassigned */
   1704                 cr = CoderResult.unmappableForLength(preToUFirstLength);
   1705             }
   1706             return cr;
   1707         }
   1708 
   1709         /*
   1710          * this works like matchFromU() except - the first character is in pre - no trie is used - the returned
   1711          * matchLength is not offset by 2
   1712          */
   1713         private int matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source,
   1714                 int[] pMatchValue, boolean isUseFallback, boolean flush) {
   1715             ByteBuffer cx = sharedData.mbcs.extIndexes;
   1716             IntBuffer toUTable, toUSection;
   1717 
   1718             int value, matchValue, srcLength = 0;
   1719             int i, j, index, length, matchLength;
   1720             short b;
   1721 
   1722             if (cx == null || cx.asIntBuffer().get(EXT_TO_U_LENGTH) <= 0) {
   1723                 return 0; /* no extension data, no match */
   1724             }
   1725 
   1726             /* initialize */
   1727             toUTable = (IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);
   1728             index = 0;
   1729 
   1730             matchValue = 0;
   1731             i = j = matchLength = 0;
   1732             if (source != null) {
   1733                 srcLength = source.remaining();
   1734             }
   1735 
   1736             if (sisoState == 0) {
   1737                 /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
   1738                 if (preLength > 1) {
   1739                     return 0; /* no match of a DBCS sequence in SBCS mode */
   1740                 } else if (preLength == 1) {
   1741                     srcLength = 0;
   1742                 } else /* preLength==0 */{
   1743                     if (srcLength > 1) {
   1744                         srcLength = 1;
   1745                     }
   1746                 }
   1747                 flush = true;
   1748             }
   1749 
   1750             /* we must not remember fallback matches when not using fallbacks */
   1751 
   1752             /* match input units until there is a full match or the input is consumed */
   1753             for (;;) {
   1754                 /* go to the next section */
   1755                 int oldpos = toUTable.position();
   1756                 toUSection = ((IntBuffer) toUTable.position(index)).slice();
   1757                 toUTable.position(oldpos);
   1758 
   1759                 /* read first pair of the section */
   1760                 value = toUSection.get();
   1761                 length = TO_U_GET_BYTE(value);
   1762                 value = TO_U_GET_VALUE(value);
   1763                 if (value != 0 && (TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback))
   1764                         && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
   1765                     /* remember longest match so far */
   1766                     matchValue = value;
   1767                     matchLength = i + j;
   1768                 }
   1769 
   1770                 /* match pre[] then src[] */
   1771                 if (i < preLength) {
   1772                     b = (short) (preArray[preArrayBegin + i++] & UConverterConstants.UNSIGNED_BYTE_MASK);
   1773                 } else if (j < srcLength) {
   1774                     b = (short) (source.get(source.position() + j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
   1775                 } else {
   1776                     /* all input consumed, partial match */
   1777                     if (flush || (length = (i + j)) > MAX_BYTES) {
   1778                         /*
   1779                          * end of the entire input stream, stop with the longest match so far or: partial match must not
   1780                          * be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers
   1781                          */
   1782                         break;
   1783                     } else {
   1784                         /* continue with more input next time */
   1785                         return -length;
   1786                     }
   1787                 }
   1788 
   1789                 /* search for the current UChar */
   1790                 value = findToU(toUSection, length, b);
   1791                 if (value == 0) {
   1792                     /* no match here, stop with the longest match so far */
   1793                     break;
   1794                 } else {
   1795                     if (TO_U_IS_PARTIAL(value)) {
   1796                         /* partial match, continue */
   1797                         index = TO_U_GET_PARTIAL_INDEX(value);
   1798                     } else {
   1799                         if ((TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback)) && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
   1800                             /* full match, stop with result */
   1801                             matchValue = value;
   1802                             matchLength = i + j;
   1803                         } else {
   1804                             /* full match on fallback not taken, stop with the longest match so far */
   1805                         }
   1806                         break;
   1807                     }
   1808                 }
   1809             }
   1810 
   1811             if (matchLength == 0) {
   1812                 /* no match at all */
   1813                 return 0;
   1814             }
   1815 
   1816             /* return result */
   1817             pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);
   1818             return matchLength;
   1819         }
   1820 
   1821         private CoderResult writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex) {
   1822             ByteBuffer cx = sharedData.mbcs.extIndexes;
   1823             /* output the result */
   1824             if (TO_U_IS_CODE_POINT(value)) {
   1825                 /* output a single code point */
   1826                 return toUWriteCodePoint(TO_U_GET_CODE_POINT(value), target, offsets, srcIndex);
   1827             } else {
   1828                 /* output a string - with correct data we have resultLength>0 */
   1829 
   1830                 char[] a = new char[TO_U_GET_LENGTH(value)];
   1831                 CharBuffer cb = ((CharBuffer) ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class));
   1832                 cb.position(TO_U_GET_INDEX(value));
   1833                 cb.get(a, 0, a.length);
   1834                 return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex);
   1835             }
   1836         }
   1837 
   1838         private CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex) {
   1839             CoderResult cr = CoderResult.UNDERFLOW;
   1840             int tBeginIndex = target.position();
   1841 
   1842             if (target.hasRemaining()) {
   1843                 if (c <= 0xffff) {
   1844                     target.put((char) c);
   1845                     c = UConverterConstants.U_SENTINEL;
   1846                 } else /* c is a supplementary code point */{
   1847                     target.put(UTF16.getLeadSurrogate(c));
   1848                     c = UTF16.getTrailSurrogate(c);
   1849                     if (target.hasRemaining()) {
   1850                         target.put((char) c);
   1851                         c = UConverterConstants.U_SENTINEL;
   1852                     }
   1853                 }
   1854 
   1855                 /* write offsets */
   1856                 if (offsets != null) {
   1857                     offsets.put(sourceIndex);
   1858                     if ((tBeginIndex + 1) < target.position()) {
   1859                         offsets.put(sourceIndex);
   1860                     }
   1861                 }
   1862             }
   1863 
   1864             /* write overflow from c */
   1865             if (c >= 0) {
   1866                 charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c);
   1867                 cr = CoderResult.OVERFLOW;
   1868             }
   1869 
   1870             return cr;
   1871         }
   1872 
   1873         /*
   1874          * Input sequence: cnv->toUBytes[0..length[ @return if(U_FAILURE) return the length (toULength, byteIndex) for
   1875          * the input else return 0 after output has been written to the target
   1876          */
   1877         private int toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex,
   1878                 boolean flush, CoderResult[] cr) {
   1879             // ByteBuffer cx;
   1880 
   1881             if (sharedData.mbcs.extIndexes != null
   1882                     && initialMatchToU(length, source, target, offsets, sourceIndex, flush, cr)) {
   1883                 return 0; /* an extension mapping handled the input */
   1884             }
   1885 
   1886             /* GB 18030 */
   1887             if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) {
   1888                 int[] range;
   1889                 int linear;
   1890                 int i;
   1891 
   1892                 linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);
   1893                 for (i = 0; i < gb18030Ranges.length; ++i) {
   1894                     range = gb18030Ranges[i];
   1895                     if (range[2] <= linear && linear <= range[3]) {
   1896                         /* found the sequence, output the Unicode code point for it */
   1897                         cr[0] = CoderResult.UNDERFLOW;
   1898 
   1899                         /* add the linear difference between the input and start sequences to the start code point */
   1900                         linear = range[0] + (linear - range[2]);
   1901 
   1902                         /* output this code point */
   1903                         cr[0] = toUWriteCodePoint(linear, target, offsets, sourceIndex);
   1904 
   1905                         return 0;
   1906                     }
   1907                 }
   1908             }
   1909 
   1910             /* no mapping */
   1911             cr[0] = CoderResult.unmappableForLength(length);
   1912             return length;
   1913         }
   1914 
   1915         /*
   1916          * target<targetLimit; set error code for overflow
   1917          */
   1918         private boolean initialMatchToU(int firstLength, ByteBuffer source, CharBuffer target, IntBuffer offsets,
   1919                 int srcIndex, boolean flush, CoderResult[] cr) {
   1920             int[] value = new int[1];
   1921             int match = 0;
   1922 
   1923             /* try to match */
   1924             match = matchToU((byte) SISO_STATE(sharedData, mode), toUBytesArray, toUBytesBegin, firstLength, source,
   1925                     value, isToUUseFallback(), flush);
   1926             if (match > 0) {
   1927                 /* advance src pointer for the consumed input */
   1928                 source.position(source.position() + match - firstLength);
   1929 
   1930                 /* write result to target */
   1931                 cr[0] = writeToU(value[0], target, offsets, srcIndex);
   1932                 return true;
   1933             } else if (match < 0) {
   1934                 /* save state for partial match */
   1935                 byte[] sArray;
   1936                 int sArrayIndex;
   1937                 int j;
   1938 
   1939                 /* copy the first code point */
   1940                 sArray = toUBytesArray;
   1941                 sArrayIndex = toUBytesBegin;
   1942                 preToUFirstLength = (byte) firstLength;
   1943                 for (j = 0; j < firstLength; ++j) {
   1944                     preToUArray[j] = sArray[sArrayIndex++];
   1945                 }
   1946 
   1947                 /* now copy the newly consumed input */
   1948                 sArrayIndex = source.position();
   1949                 match = -match;
   1950                 for (; j < match; ++j) {
   1951                     preToUArray[j] = source.get(sArrayIndex++);
   1952                 }
   1953                 source.position(sArrayIndex);
   1954                 preToULength = (byte) match;
   1955                 return true;
   1956             } else /* match==0 no match */{
   1957                 return false;
   1958             }
   1959         }
   1960 
   1961         private int simpleMatchToU(ByteBuffer source, boolean useFallback) {
   1962             int[] value = new int[1];
   1963             int match;
   1964 
   1965             if (source.remaining() <= 0) {
   1966                 return 0xffff;
   1967             }
   1968 
   1969             /* try to match */
   1970             byte[] sourceArray;
   1971             int sourcePosition, sourceLimit;
   1972             if (source.isReadOnly()) {
   1973                 // source.array() would throw an exception
   1974                 sourcePosition = source.position();  // relative to source.array()
   1975                 sourceArray = new byte[Math.min(source.remaining(), EXT_MAX_BYTES)];
   1976                 source.get(sourceArray).position(sourcePosition);
   1977                 sourcePosition = 0;  // relative to sourceArray
   1978                 sourceLimit = sourceArray.length;
   1979             } else {
   1980                 sourceArray = source.array();
   1981                 sourcePosition = source.position();
   1982                 sourceLimit = source.limit();
   1983             }
   1984             match = matchToU((byte) -1, sourceArray, sourcePosition, sourceLimit, null, value, useFallback, true);
   1985 
   1986             if (match == source.remaining()) {
   1987                 /* write result for simple, single-character conversion */
   1988                 if (TO_U_IS_CODE_POINT(value[0])) {
   1989                     return TO_U_GET_CODE_POINT(value[0]);
   1990                 }
   1991             }
   1992 
   1993             /*
   1994              * return no match because - match>0 && value points to string: simple conversion cannot handle multiple
   1995              * code points - match>0 && match!=length: not all input consumed, forbidden for this function - match==0:
   1996              * no match found in the first place - match<0: partial match, not supported for simple conversion (and
   1997              * flush==TRUE)
   1998              */
   1999             return 0xfffe;
   2000         }
   2001 
   2002         CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
   2003             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2004 
   2005             int sourceArrayIndex, sourceArrayIndexStart;
   2006             int stateTable[][/* 256 */];
   2007             char[] unicodeCodeUnits;
   2008 
   2009             int offset;
   2010             byte state;
   2011             int byteIndex;
   2012             byte[] bytes;
   2013 
   2014             int sourceIndex, nextSourceIndex;
   2015 
   2016             int entry = 0;
   2017             char c;
   2018             byte action;
   2019 
   2020             if (preToULength > 0) {
   2021                 /*
   2022                  * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
   2023                  * continuous offsets
   2024                  */
   2025                 cr[0] = continueMatchToU(source, target, offsets, -1, flush);
   2026 
   2027                 if (cr[0].isError() || preToULength < 0) {
   2028                     return cr[0];
   2029                 }
   2030             }
   2031 
   2032             if (sharedData.mbcs.countStates == 1) {
   2033                 if (!sharedData.mbcs.hasSupplementary()) {
   2034                     cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
   2035                 } else {
   2036                     cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
   2037                 }
   2038                 return cr[0];
   2039             }
   2040 
   2041             /* set up the local pointers */
   2042             sourceArrayIndex = sourceArrayIndexStart = source.position();
   2043 
   2044             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2045                 stateTable = sharedData.mbcs.swapLFNLStateTable;
   2046             } else {
   2047                 stateTable = sharedData.mbcs.stateTable;
   2048             }
   2049             unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
   2050 
   2051             /* get the converter state from UConverter */
   2052             offset = toUnicodeStatus;
   2053             byteIndex = toULength;
   2054             bytes = toUBytesArray;
   2055 
   2056             /*
   2057              * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
   2058              * (dbcsOnlyState==0 if it is not a DBCS-only converter)
   2059              */
   2060             state = (byte)mode;
   2061             if (state == 0) {
   2062                 state = sharedData.mbcs.dbcsOnlyState;
   2063             }
   2064 
   2065             /* sourceIndex=-1 if the current character began in the previous buffer */
   2066             sourceIndex = byteIndex == 0 ? 0 : -1;
   2067             nextSourceIndex = 0;
   2068 
   2069             /* conversion loop */
   2070             while (sourceArrayIndex < source.limit()) {
   2071                 /*
   2072                  * This following test is to see if available input would overflow the output. It does not catch output
   2073                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
   2074                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,
   2075                  * too.
   2076                  */
   2077                 if (!target.hasRemaining()) {
   2078                     /* target is full */
   2079                     cr[0] = CoderResult.OVERFLOW;
   2080                     break;
   2081                 }
   2082 
   2083                 if (byteIndex == 0) {
   2084                     /* optimized loop for 1/2-byte input and BMP output */
   2085                     // agljport:todo see ucnvmbcs.c for deleted block
   2086                     do {
   2087                         entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
   2088                         if (MBCS_ENTRY_IS_TRANSITION(entry)) {
   2089                             state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
   2090                             offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
   2091                             ++sourceArrayIndex;
   2092                             if (sourceArrayIndex < source.limit()
   2093                                     && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
   2094                                     && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
   2095                                     && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
   2096                                 ++sourceArrayIndex;
   2097                                 target.put(c);
   2098                                 if (offsets != null) {
   2099                                     offsets.put(sourceIndex);
   2100                                     sourceIndex = (nextSourceIndex += 2);
   2101                                 }
   2102                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
   2103                                 offset = 0;
   2104                             } else {
   2105                                 /* set the state and leave the optimized loop */
   2106                                 ++nextSourceIndex;
   2107                                 bytes[0] = source.get(sourceArrayIndex - 1);
   2108                                 byteIndex = 1;
   2109                                 break;
   2110                             }
   2111                         } else {
   2112                             if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
   2113                                 /* output BMP code point */
   2114                                 ++sourceArrayIndex;
   2115                                 target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2116                                 if (offsets != null) {
   2117                                     offsets.put(sourceIndex);
   2118                                     sourceIndex = ++nextSourceIndex;
   2119                                 }
   2120                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
   2121                             } else {
   2122                                 /* leave the optimized loop */
   2123                                 break;
   2124                             }
   2125                         }
   2126                     } while (sourceArrayIndex < source.limit() && target.hasRemaining());
   2127                     /*
   2128                      * these tests and break statements could be put inside the loop if C had "break outerLoop" like
   2129                      * Java
   2130                      */
   2131                     if (sourceArrayIndex >= source.limit()) {
   2132                         break;
   2133                     }
   2134                     if (!target.hasRemaining()) {
   2135                         /* target is full */
   2136                         cr[0] = CoderResult.OVERFLOW;
   2137                         break;
   2138                     }
   2139 
   2140                     ++nextSourceIndex;
   2141                     bytes[byteIndex++] = source.get(sourceArrayIndex++);
   2142                 } else /* byteIndex>0 */{
   2143                     ++nextSourceIndex;
   2144                     entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
   2145                             & UConverterConstants.UNSIGNED_BYTE_MASK];
   2146                 }
   2147 
   2148                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {
   2149                     state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
   2150                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
   2151                     continue;
   2152                 }
   2153 
   2154                 /* save the previous state for proper extension mapping with SI/SO-stateful converters */
   2155                 mode = state;
   2156 
   2157                 /* set the next state early so that we can reuse the entry variable */
   2158                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
   2159 
   2160                 /*
   2161                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2162                  * switch.
   2163                  */
   2164                 action = (byte)MBCS_ENTRY_FINAL_ACTION(entry);
   2165                 if (action == MBCS_STATE_VALID_16) {
   2166                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2167                     c = unicodeCodeUnits[offset];
   2168                     if (c < 0xfffe) {
   2169                         /* output BMP code point */
   2170                         target.put(c);
   2171                         if (offsets != null) {
   2172                             offsets.put(sourceIndex);
   2173                         }
   2174                         byteIndex = 0;
   2175                     } else if (c == 0xfffe) {
   2176                         if (isFallbackUsed() && (entry = getFallback(sharedData.mbcs, offset)) != 0xfffe) {
   2177                             /* output fallback BMP code point */
   2178                             target.put((char)entry);
   2179                             if (offsets != null) {
   2180                                 offsets.put(sourceIndex);
   2181                             }
   2182                             byteIndex = 0;
   2183                         }
   2184                     } else {
   2185                         /* callback(illegal) */
   2186                         cr[0] = CoderResult.malformedForLength(byteIndex);
   2187                     }
   2188                 } else if (action == MBCS_STATE_VALID_DIRECT_16) {
   2189                     /* output BMP code point */
   2190                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2191                     if (offsets != null) {
   2192                         offsets.put(sourceIndex);
   2193                     }
   2194                     byteIndex = 0;
   2195                 } else if (action == MBCS_STATE_VALID_16_PAIR) {
   2196                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2197                     c = unicodeCodeUnits[offset++];
   2198                     if (c < 0xd800) {
   2199                         /* output BMP code point below 0xd800 */
   2200                         target.put(c);
   2201                         if (offsets != null) {
   2202                             offsets.put(sourceIndex);
   2203                         }
   2204                         byteIndex = 0;
   2205                     } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
   2206                         /* output roundtrip or fallback surrogate pair */
   2207                         target.put((char)(c & 0xdbff));
   2208                         if (offsets != null) {
   2209                             offsets.put(sourceIndex);
   2210                         }
   2211                         byteIndex = 0;
   2212                         if (target.hasRemaining()) {
   2213                             target.put(unicodeCodeUnits[offset]);
   2214                             if (offsets != null) {
   2215                                 offsets.put(sourceIndex);
   2216                             }
   2217                         } else {
   2218                             /* target overflow */
   2219                             charErrorBufferArray[0] = unicodeCodeUnits[offset];
   2220                             charErrorBufferLength = 1;
   2221                             cr[0] = CoderResult.OVERFLOW;
   2222 
   2223                             offset = 0;
   2224                             break;
   2225                         }
   2226                     } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
   2227                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
   2228                         target.put(unicodeCodeUnits[offset]);
   2229                         if (offsets != null) {
   2230                             offsets.put(sourceIndex);
   2231                         }
   2232                         byteIndex = 0;
   2233                     } else if (c == 0xffff) {
   2234                         /* callback(illegal) */
   2235                         cr[0] = CoderResult.malformedForLength(byteIndex);
   2236                     }
   2237                 } else if (action == MBCS_STATE_VALID_DIRECT_20
   2238                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
   2239                     entry = MBCS_ENTRY_FINAL_VALUE(entry);
   2240                     /* output surrogate pair */
   2241                     target.put((char)(0xd800 | (char)(entry >> 10)));
   2242                     if (offsets != null) {
   2243                         offsets.put(sourceIndex);
   2244                     }
   2245                     byteIndex = 0;
   2246                     c = (char)(0xdc00 | (char)(entry & 0x3ff));
   2247                     if (target.hasRemaining()) {
   2248                         target.put(c);
   2249                         if (offsets != null) {
   2250                             offsets.put(sourceIndex);
   2251                         }
   2252                     } else {
   2253                         /* target overflow */
   2254                         charErrorBufferArray[0] = c;
   2255                         charErrorBufferLength = 1;
   2256                         cr[0] = CoderResult.OVERFLOW;
   2257 
   2258                         offset = 0;
   2259                         break;
   2260                     }
   2261                 } else if (action == MBCS_STATE_CHANGE_ONLY) {
   2262                     /*
   2263                      * This serves as a state change without any output. It is useful for reading simple stateful
   2264                      * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
   2265                      * for more sophisticated state transitions.
   2266                      */
   2267                     if (sharedData.mbcs.dbcsOnlyState == 0) {
   2268                         byteIndex = 0;
   2269                     } else {
   2270                         /* SI/SO are illegal for DBCS-only conversion */
   2271                         state = (byte)(mode); /* restore the previous state */
   2272 
   2273                         /* callback(illegal) */
   2274                         cr[0] = CoderResult.malformedForLength(byteIndex);
   2275                     }
   2276                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2277                     if (isFallbackUsed()) {
   2278                         /* output BMP code point */
   2279                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2280                         if (offsets != null) {
   2281                             offsets.put(sourceIndex);
   2282                         }
   2283                         byteIndex = 0;
   2284                     }
   2285                 } else if (action == MBCS_STATE_UNASSIGNED) {
   2286                     /* just fall through */
   2287                 } else if (action == MBCS_STATE_ILLEGAL) {
   2288                     /* callback(illegal) */
   2289                     cr[0] = CoderResult.malformedForLength(byteIndex);
   2290                 } else {
   2291                     /* reserved, must never occur */
   2292                     byteIndex = 0;
   2293                 }
   2294 
   2295                 /* end of action codes: prepare for a new character */
   2296                 offset = 0;
   2297 
   2298                 if (byteIndex == 0) {
   2299                     sourceIndex = nextSourceIndex;
   2300                 } else if (cr[0].isError()) {
   2301                     /* callback(illegal) */
   2302                     if (byteIndex > 1) {
   2303                         /*
   2304                          * Ticket 5691: consistent illegal sequences:
   2305                          * - We include at least the first byte in the illegal sequence.
   2306                          * - If any of the non-initial bytes could be the start of a character,
   2307                          *   we stop the illegal sequence before the first one of those.
   2308                          */
   2309                         boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0);
   2310                         byte i;
   2311                         for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {}
   2312                         if (i < byteIndex) {
   2313                             byte backOutDistance = (byte)(byteIndex - i);
   2314                             int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart;
   2315                             byteIndex = i; /* length of reported illegal byte sequence */
   2316                             if (backOutDistance <= bytesFromThisBuffer) {
   2317                                 sourceArrayIndex -= backOutDistance;
   2318                             } else {
   2319                                 /* Back out bytes from the previous buffer: Need to replay them. */
   2320                                 this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
   2321                                 /* preToULength is negative! */
   2322                                 for (int n = 0; n < -this.preToULength; n++) {
   2323                                     this.preToUArray[n] = bytes[i+n];
   2324                                 }
   2325                                 sourceArrayIndex = sourceArrayIndexStart;
   2326                             }
   2327                         }
   2328                     }
   2329                     break;
   2330                 } else /* unassigned sequences indicated with byteIndex>0 */{
   2331                     /* try an extension mapping */
   2332                     int sourceBeginIndex = sourceArrayIndex;
   2333                     source.position(sourceArrayIndex);
   2334                     byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
   2335                     sourceArrayIndex = source.position();
   2336                     sourceIndex = nextSourceIndex += (sourceArrayIndex - sourceBeginIndex);
   2337 
   2338                     if (cr[0].isError() || cr[0].isOverflow()) {
   2339                         /* not mappable or buffer overflow */
   2340                         break;
   2341                     }
   2342                 }
   2343             }
   2344 
   2345             /* set the converter state back into UConverter */
   2346             toUnicodeStatus = offset;
   2347             mode = state;
   2348             toULength = byteIndex;
   2349 
   2350             /* write back the updated pointers */
   2351             source.position(sourceArrayIndex);
   2352 
   2353             return cr[0];
   2354         }
   2355         /*
   2356          * This version of cnvMBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages that
   2357          * only map to and from the BMP. In addition to single-byte optimizations, the offset calculations become much
   2358          * easier.
   2359          */
   2360         private CoderResult cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
   2361                 boolean flush) {
   2362             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2363 
   2364             int sourceArrayIndex, lastSource;
   2365             int targetCapacity, length;
   2366             int[][] stateTable;
   2367 
   2368             int sourceIndex;
   2369 
   2370             int entry;
   2371             byte action;
   2372 
   2373             /* set up the local pointers */
   2374             sourceArrayIndex = source.position();
   2375             targetCapacity = target.remaining();
   2376 
   2377             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2378                 stateTable = sharedData.mbcs.swapLFNLStateTable;
   2379             } else {
   2380                 stateTable = sharedData.mbcs.stateTable;
   2381             }
   2382 
   2383             /* sourceIndex=-1 if the current character began in the previous buffer */
   2384             sourceIndex = 0;
   2385             lastSource = sourceArrayIndex;
   2386 
   2387             /*
   2388              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
   2389              * sourceLength and targetCapacity
   2390              */
   2391             length = source.remaining();
   2392             if (length < targetCapacity) {
   2393                 targetCapacity = length;
   2394             }
   2395 
   2396             /* conversion loop */
   2397             while (targetCapacity > 0 && sourceArrayIndex < source.limit()) {
   2398                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
   2399                 /* MBCS_ENTRY_IS_FINAL(entry) */
   2400 
   2401                 /* test the most common case first */
   2402                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
   2403                     /* output BMP code point */
   2404                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2405                     --targetCapacity;
   2406                     continue;
   2407                 }
   2408 
   2409                 /*
   2410                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2411                  * switch.
   2412                  */
   2413                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
   2414                 if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2415                     if (isFallbackUsed()) {
   2416                         /* output BMP code point */
   2417                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2418                         --targetCapacity;
   2419                         continue;
   2420                     }
   2421                 } else if (action == MBCS_STATE_UNASSIGNED) {
   2422                     /* just fall through */
   2423                 } else if (action == MBCS_STATE_ILLEGAL) {
   2424                     /* callback(illegal) */
   2425                     cr[0] = CoderResult.malformedForLength(sourceArrayIndex - lastSource);
   2426                 } else {
   2427                     /* reserved, must never occur */
   2428                     continue;
   2429                 }
   2430 
   2431                 /* set offsets since the start or the last extension */
   2432                 if (offsets != null) {
   2433                     int count = sourceArrayIndex - lastSource;
   2434 
   2435                     /* predecrement: do not set the offset for the callback-causing character */
   2436                     while (--count > 0) {
   2437                         offsets.put(sourceIndex++);
   2438                     }
   2439                     /* offset and sourceIndex are now set for the current character */
   2440                 }
   2441 
   2442                 if (cr[0].isError()) {
   2443                     /* callback(illegal) */
   2444                     break;
   2445                 } else /* unassigned sequences indicated with byteIndex>0 */{
   2446                     /* try an extension mapping */
   2447                     lastSource = sourceArrayIndex;
   2448                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);
   2449                     source.position(sourceArrayIndex);
   2450                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
   2451                     sourceArrayIndex = source.position();
   2452                     sourceIndex += 1 + (sourceArrayIndex - lastSource);
   2453 
   2454                     if (cr[0].isError()) {
   2455                         /* not mappable or buffer overflow */
   2456                         break;
   2457                     }
   2458 
   2459                     /* recalculate the targetCapacity after an extension mapping */
   2460                     targetCapacity = target.remaining();
   2461                     length = source.remaining();
   2462                     if (length < targetCapacity) {
   2463                         targetCapacity = length;
   2464                     }
   2465                 }
   2466             }
   2467 
   2468             if (!cr[0].isError() && sourceArrayIndex < source.limit() && !target.hasRemaining()) {
   2469                 /* target is full */
   2470                 cr[0] = CoderResult.OVERFLOW;
   2471             }
   2472 
   2473             /* set offsets since the start or the last callback */
   2474             if (offsets != null) {
   2475                 int count = sourceArrayIndex - lastSource;
   2476                 while (count > 0) {
   2477                     offsets.put(sourceIndex++);
   2478                     --count;
   2479                 }
   2480             }
   2481 
   2482             /* write back the updated pointers */
   2483             source.position(sourceArrayIndex);
   2484 
   2485             return cr[0];
   2486         }
   2487 
   2488         /* This version of cnvMBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
   2489         private CoderResult cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
   2490                 boolean flush) {
   2491             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2492 
   2493             int sourceArrayIndex;
   2494             int[][] stateTable;
   2495 
   2496             int sourceIndex;
   2497 
   2498             int entry;
   2499             char c;
   2500             byte action;
   2501 
   2502             /* set up the local pointers */
   2503             sourceArrayIndex = source.position();
   2504 
   2505             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2506                 stateTable = sharedData.mbcs.swapLFNLStateTable;
   2507             } else {
   2508                 stateTable = sharedData.mbcs.stateTable;
   2509             }
   2510 
   2511             /* sourceIndex=-1 if the current character began in the previous buffer */
   2512             sourceIndex = 0;
   2513 
   2514             /* conversion loop */
   2515             while (sourceArrayIndex < source.limit()) {
   2516                 /*
   2517                  * This following test is to see if available input would overflow the output. It does not catch output
   2518                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
   2519                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,
   2520                  * too.
   2521                  */
   2522                 if (!target.hasRemaining()) {
   2523                     /* target is full */
   2524                     cr[0] = CoderResult.OVERFLOW;
   2525                     break;
   2526                 }
   2527 
   2528                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
   2529                 /* MBCS_ENTRY_IS_FINAL(entry) */
   2530 
   2531                 /* test the most common case first */
   2532                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
   2533                     /* output BMP code point */
   2534                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2535                     if (offsets != null) {
   2536                         offsets.put(sourceIndex);
   2537                     }
   2538 
   2539                     /* normal end of action codes: prepare for a new character */
   2540                     ++sourceIndex;
   2541                     continue;
   2542                 }
   2543 
   2544                 /*
   2545                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2546                  * switch.
   2547                  */
   2548                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
   2549                 if (action == MBCS_STATE_VALID_DIRECT_20
   2550                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
   2551 
   2552                     entry = MBCS_ENTRY_FINAL_VALUE(entry);
   2553                     /* output surrogate pair */
   2554                     target.put((char) (0xd800 | (char) (entry >>> 10)));
   2555                     if (offsets != null) {
   2556                         offsets.put(sourceIndex);
   2557                     }
   2558                     c = (char) (0xdc00 | (char) (entry & 0x3ff));
   2559                     if (target.hasRemaining()) {
   2560                         target.put(c);
   2561                         if (offsets != null) {
   2562                             offsets.put(sourceIndex);
   2563                         }
   2564                     } else {
   2565                         /* target overflow */
   2566                         charErrorBufferArray[0] = c;
   2567                         charErrorBufferLength = 1;
   2568                         cr[0] = CoderResult.OVERFLOW;
   2569                         break;
   2570                     }
   2571 
   2572                     ++sourceIndex;
   2573                     continue;
   2574                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2575                     if (isFallbackUsed()) {
   2576                         /* output BMP code point */
   2577                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
   2578                         if (offsets != null) {
   2579                             offsets.put(sourceIndex);
   2580                         }
   2581 
   2582                         ++sourceIndex;
   2583                         continue;
   2584                     }
   2585                 } else if (action == MBCS_STATE_UNASSIGNED) {
   2586                     /* just fall through */
   2587                 } else if (action == MBCS_STATE_ILLEGAL) {
   2588                     /* callback(illegal) */
   2589                     cr[0] = CoderResult.malformedForLength(1);
   2590                 } else {
   2591                     /* reserved, must never occur */
   2592                     ++sourceIndex;
   2593                     continue;
   2594                 }
   2595 
   2596                 if (cr[0].isError()) {
   2597                     /* callback(illegal) */
   2598                     break;
   2599                 } else /* unassigned sequences indicated with byteIndex>0 */{
   2600                     /* try an extension mapping */
   2601                     int sourceBeginIndex = sourceArrayIndex;
   2602                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);
   2603                     source.position(sourceArrayIndex);
   2604                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
   2605                     sourceArrayIndex = source.position();
   2606                     sourceIndex += 1 + (sourceArrayIndex - sourceBeginIndex);
   2607 
   2608                     if (cr[0].isError()) {
   2609                         /* not mappable or buffer overflow */
   2610                         break;
   2611                     }
   2612                 }
   2613             }
   2614 
   2615             /* write back the updated pointers */
   2616             source.position(sourceArrayIndex);
   2617 
   2618             return cr[0];
   2619         }
   2620 
   2621         private int getFallback(UConverterMBCSTable mbcsTable, int offset) {
   2622             MBCSToUFallback[] toUFallbacks;
   2623             int i, start, limit;
   2624 
   2625             limit = mbcsTable.countToUFallbacks;
   2626             if (limit > 0) {
   2627                 /* do a binary search for the fallback mapping */
   2628                 toUFallbacks = mbcsTable.toUFallbacks;
   2629                 start = 0;
   2630                 while (start < limit - 1) {
   2631                     i = (start + limit) >>> 1;
   2632                     if (offset < toUFallbacks[i].offset) {
   2633                         limit = i;
   2634                     } else {
   2635                         start = i;
   2636                     }
   2637                 }
   2638 
   2639                 /* did we really find it? */
   2640                 if (offset == toUFallbacks[start].offset) {
   2641                     return toUFallbacks[start].codePoint;
   2642                 }
   2643             }
   2644 
   2645             return 0xfffe;
   2646         }
   2647 
   2648         /**
   2649          * This is a simple version of _MBCSGetNextUChar() that is used by other converter implementations. It only
   2650          * returns an "assigned" result if it consumes the entire input. It does not use state from the converter, nor
   2651          * error codes. It does not handle the EBCDIC swaplfnl option (set in UConverter). It handles conversion
   2652          * extensions but not GB 18030.
   2653          *
   2654          * @return U+fffe unassigned U+ffff illegal otherwise the Unicode code point
   2655          */
   2656         int simpleGetNextUChar(ByteBuffer source, boolean useFallback) {
   2657 
   2658             // #if 0
   2659             // /*
   2660             // * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
   2661             // * TODO In future releases, verify that this function is never called for SBCS
   2662             // * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
   2663             // * Removal improves code coverage.
   2664             // */
   2665             // /* use optimized function if possible */
   2666             // if(sharedData->mbcs.countStates==1) {
   2667             // if(length==1) {
   2668             // return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
   2669             // } else {
   2670             // return 0xffff; /* illegal: more than a single byte for an SBCS converter */
   2671             // }
   2672             // }
   2673             // #endif
   2674 
   2675             /* set up the local pointers */
   2676             int[][] stateTable = sharedData.mbcs.stateTable;
   2677             char[] unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
   2678 
   2679             /* converter state */
   2680             int offset = 0;
   2681             int state = sharedData.mbcs.dbcsOnlyState;
   2682 
   2683             int action;
   2684             int entry;
   2685             int c;
   2686             int i = source.position();
   2687             int length = source.limit() - i;
   2688 
   2689             /* conversion loop */
   2690             while (true) {
   2691                 // entry=stateTable[state][(uint8_t)source[i++]];
   2692                 entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK];
   2693 
   2694                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {
   2695                     state = MBCS_ENTRY_TRANSITION_STATE(entry);
   2696                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
   2697 
   2698                     if (i == source.limit()) {
   2699                         return 0xffff; /* truncated character */
   2700                     }
   2701                 } else {
   2702                     /*
   2703                      * An if-else-if chain provides more reliable performance for the most common cases compared to a
   2704                      * switch.
   2705                      */
   2706                     action = MBCS_ENTRY_FINAL_ACTION(entry);
   2707                     if (action == MBCS_STATE_VALID_16) {
   2708                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2709                         c = unicodeCodeUnits[offset];
   2710                         if (c != 0xfffe) {
   2711                             /* done */
   2712                         } else if (isToUUseFallback()) {
   2713                             c = getFallback(sharedData.mbcs, offset);
   2714                         }
   2715                         /* else done with 0xfffe */
   2716                     } else if (action == MBCS_STATE_VALID_DIRECT_16) {
   2717                         // /* output BMP code point */
   2718                         c = MBCS_ENTRY_FINAL_VALUE_16(entry);
   2719                     } else if (action == MBCS_STATE_VALID_16_PAIR) {
   2720                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
   2721                         c = unicodeCodeUnits[offset++];
   2722                         if (c < 0xd800) {
   2723                             /* output BMP code point below 0xd800 */
   2724                         } else if (isToUUseFallback() ? c <= 0xdfff : c <= 0xdbff) {
   2725                             /* output roundtrip or fallback supplementary code point */
   2726                             c = (((c & 0x3ff) << 10) + unicodeCodeUnits[offset] + (0x10000 - 0xdc00));
   2727                         } else if (isToUUseFallback() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
   2728                             /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
   2729                             c = unicodeCodeUnits[offset];
   2730                         } else if (c == 0xffff) {
   2731                             return 0xffff;
   2732                         } else {
   2733                             c = 0xfffe;
   2734                         }
   2735                     } else if (action == MBCS_STATE_VALID_DIRECT_20) {
   2736                         /* output supplementary code point */
   2737                         c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
   2738                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
   2739                         if (!isToUUseFallback(useFallback)) {
   2740                             c = 0xfffe;
   2741                         } else {
   2742                             /* output BMP code point */
   2743                             c = MBCS_ENTRY_FINAL_VALUE_16(entry);
   2744                         }
   2745                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_20) {
   2746                         if (!isToUUseFallback(useFallback)) {
   2747                             c = 0xfffe;
   2748                         } else {
   2749                             /* output supplementary code point */
   2750                             c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
   2751                         }
   2752                     } else if (action == MBCS_STATE_UNASSIGNED) {
   2753                         c = 0xfffe;
   2754                     } else {
   2755                         /*
   2756                          * forbid MBCS_STATE_CHANGE_ONLY for this function, and MBCS_STATE_ILLEGAL and reserved action
   2757                          * codes
   2758                          */
   2759                         return 0xffff;
   2760                     }
   2761                     break;
   2762                 }
   2763             }
   2764 
   2765             if (i != source.limit()) {
   2766                 /* illegal for this function: not all input consumed */
   2767                 return 0xffff;
   2768             }
   2769 
   2770             if (c == 0xfffe) {
   2771                 /* try an extension mapping */
   2772                 if (sharedData.mbcs.extIndexes != null) {
   2773                     /* Increase the limit for proper handling. Used in LMBCS. */
   2774                     if (source.limit() > i + length) {
   2775                         source.limit(i + length);
   2776                     }
   2777                     return simpleMatchToU(source, useFallback);
   2778                 }
   2779             }
   2780 
   2781             return c;
   2782         }
   2783         private boolean hasValidTrailBytes(int[][] stateTable, short state) {
   2784             int[] row = stateTable[state];
   2785             int b, entry;
   2786             /* First test for final entries in this state for some commonly valid byte values. */
   2787             entry = row[0xa1];
   2788             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
   2789                 return true;
   2790             }
   2791             entry = row[0x41];
   2792             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
   2793                 return true;
   2794             }
   2795             /* Then test for final entries in this state. */
   2796             for (b = 0; b <= 0xff; b++) {
   2797                 entry = row[b];
   2798                 if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
   2799                     return true;
   2800                 }
   2801             }
   2802             /* Then recurse for transition entries. */
   2803             for (b = 0; b <= 0xff; b++) {
   2804                 entry = row[b];
   2805                 if (MBCS_ENTRY_IS_TRANSITION(entry) &&
   2806                         hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry))) {
   2807                     return true;
   2808                 }
   2809             }
   2810             return false;
   2811         }
   2812 
   2813         private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) {
   2814             int[] row = stateTable[state];
   2815             int entry = row[b];
   2816             if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
   2817                 return hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry));
   2818             } else {
   2819                 int action = MBCS_ENTRY_FINAL_ACTION(entry);
   2820                 if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
   2821                     return false;   /* SI/SO are illegal for DBCS-only conversion */
   2822                 } else {
   2823                     return (action != MBCS_STATE_ILLEGAL);
   2824                 }
   2825             }
   2826         }
   2827 
   2828 
   2829     }
   2830 
   2831     class CharsetEncoderMBCS extends CharsetEncoderICU {
   2832         private boolean allowReplacementChanges = false;
   2833 
   2834         CharsetEncoderMBCS(CharsetICU cs) {
   2835             super(cs, fromUSubstitution);
   2836             allowReplacementChanges = true; // allow changes in implReplaceWith
   2837             implReset();
   2838         }
   2839 
   2840         @Override
   2841         protected void implReset() {
   2842             super.implReset();
   2843             preFromUFirstCP = UConverterConstants.U_SENTINEL;
   2844         }
   2845 
   2846         @Override
   2847         @SuppressWarnings("fallthrough")
   2848         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
   2849             CoderResult[] cr = { CoderResult.UNDERFLOW };
   2850             // if (!source.hasRemaining() && fromUChar32 == 0)
   2851             // return cr[0];
   2852 
   2853             int sourceArrayIndex;
   2854             char[] table;
   2855             byte[] pArray, bytes;
   2856             char[] chars;
   2857             int[] ints;
   2858             int pArrayIndex, outputType, c;
   2859             int prevSourceIndex, sourceIndex, nextSourceIndex;
   2860             int stage2Entry = 0, value = 0, length = 0, prevLength;
   2861             short uniMask;
   2862             // long asciiRoundtrips;
   2863 
   2864             byte[] si_value = new byte[2];
   2865             byte[] so_value = new byte[2];
   2866             int si_value_length = 0, so_value_length = 0;
   2867 
   2868             boolean gotoUnassigned = false;
   2869 
   2870             try {
   2871 
   2872                 if (!flush && preFromUFirstCP >= 0) {
   2873                     /*
   2874                      * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change
   2875                      * with continuous offsets
   2876                      */
   2877                     cr[0] = continueMatchFromU(source, target, offsets, flush, -1);
   2878 
   2879                     if (cr[0].isError() || preFromULength < 0) {
   2880                         return cr[0];
   2881                     }
   2882                 }
   2883 
   2884                 /* use optimized function if possible */
   2885                 outputType = sharedData.mbcs.outputType;
   2886                 uniMask = sharedData.mbcs.unicodeMask;
   2887                 if (outputType == MBCS_OUTPUT_1 && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   2888                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
   2889                         cr[0] = cnvMBCSSingleFromBMPWithOffsets(source, target, offsets, flush);
   2890                     } else {
   2891                         cr[0] = cnvMBCSSingleFromUnicodeWithOffsets(source, target, offsets, flush);
   2892                     }
   2893                     return cr[0];
   2894                 } else if (outputType == MBCS_OUTPUT_2) {
   2895                     cr[0] = cnvMBCSDoubleFromUnicodeWithOffsets(source, target, offsets, flush);
   2896                     return cr[0];
   2897                 }
   2898 
   2899                 table = sharedData.mbcs.fromUnicodeTable;
   2900                 int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
   2901                 sourceArrayIndex = source.position();
   2902 
   2903                 bytes = sharedData.mbcs.fromUnicodeBytes;
   2904                 ints = sharedData.mbcs.fromUnicodeInts;
   2905                 if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   2906                     chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
   2907                 } else {
   2908                     chars = sharedData.mbcs.fromUnicodeChars;
   2909                 }
   2910 
   2911                 // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;
   2912 
   2913                 /* get the converter state from UConverter */
   2914                 c = fromUChar32;
   2915 
   2916                 if (outputType == MBCS_OUTPUT_2_SISO) {
   2917                     prevLength = fromUnicodeStatus;
   2918                     if (prevLength == 0) {
   2919                         /* set the real value */
   2920                         prevLength = 1;
   2921                     }
   2922                 } else {
   2923                     /* prevent fromUnicodeStatus from being set to something non-0 */
   2924                     prevLength = 0;
   2925                 }
   2926 
   2927                 /* sourceIndex=-1 if the current character began in the previous buffer */
   2928                 prevSourceIndex = -1;
   2929                 sourceIndex = c == 0 ? 0 : -1;
   2930                 nextSourceIndex = 0;
   2931 
   2932                 /* Get the SI/SO character for the converter */
   2933                 si_value_length = getSISOBytes(SISO_Option.SI, options, si_value);
   2934                 so_value_length = getSISOBytes(SISO_Option.SO, options, so_value);
   2935 
   2936                 /* conversion loop */
   2937                 /*
   2938                  * This is another piece of ugly code: A goto into the loop if the converter state contains a first
   2939                  * surrogate from the previous function call. It saves me to check in each loop iteration a check of
   2940                  * if(c==0) and duplicating the trail-surrogate-handling code in the else branch of that check. I could
   2941                  * not find any other way to get around this other than using a function call for the conversion and
   2942                  * callback, which would be even more inefficient.
   2943                  *
   2944                  * Markus Scherer 2000-jul-19
   2945                  */
   2946                 boolean doloop = true;
   2947                 boolean doread = true;
   2948                 if (c != 0 && target.hasRemaining()) {
   2949                     if (UTF16.isLeadSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   2950                         // c is a lead surrogate, read another input
   2951                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
   2952                                 prevSourceIndex, prevLength);
   2953                         doloop = getTrail(source, target, uniMask, x, flush, cr);
   2954                         doread = x.doread;
   2955                         c = x.c;
   2956                         sourceArrayIndex = x.sourceArrayIndex;
   2957                         sourceIndex = x.sourceIndex;
   2958                         nextSourceIndex = x.nextSourceIndex;
   2959                         prevSourceIndex = x.prevSourceIndex;
   2960                         prevLength = x.prevLength;
   2961                     } else {
   2962                         // c is not a lead surrogate, do not read another input
   2963                         doread = false;
   2964                     }
   2965                 }
   2966 
   2967                 if (doloop) {
   2968                     while (!doread || sourceArrayIndex < source.limit()) {
   2969                         /*
   2970                          * This following test is to see if available input would overflow the output. It does not catch
   2971                          * output of more than one byte that overflows as a result of a multi-byte character or callback
   2972                          * output from the last source character. Therefore, those situations also test for overflows
   2973                          * and will then break the loop, too.
   2974                          */
   2975                         if (target.hasRemaining()) {
   2976                             /*
   2977                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
   2978                              * surrogate pair for a "supplementary code point".
   2979                              */
   2980 
   2981                             if (doread) {
   2982                                 // doread might be false only on the first looping
   2983 
   2984                                 c = source.get(sourceArrayIndex++);
   2985                                 ++nextSourceIndex;
   2986 
   2987                                 /*
   2988                                  * This also tests if the codepage maps single surrogates. If it does, then surrogates
   2989                                  * are not paired but mapped separately. Note that in this case unmatched surrogates are
   2990                                  * not detected.
   2991                                  */
   2992                                 if (UTF16.isSurrogate((char) c)
   2993                                         && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   2994                                     if (UTF16.isLeadSurrogate((char) c)) {
   2995                                         // getTrail:
   2996                                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
   2997                                                 nextSourceIndex, prevSourceIndex, prevLength);
   2998                                         doloop = getTrail(source, target, uniMask, x, flush, cr);
   2999                                         c = x.c;
   3000                                         sourceArrayIndex = x.sourceArrayIndex;
   3001                                         sourceIndex = x.sourceIndex;
   3002                                         nextSourceIndex = x.nextSourceIndex;
   3003                                         prevSourceIndex = x.prevSourceIndex;
   3004 
   3005                                         if (x.doread) {
   3006                                             if (doloop)
   3007                                                 continue;
   3008                                             else
   3009                                                 break;
   3010                                         }
   3011                                     } else {
   3012                                         /* this is an unmatched trail code unit (2nd surrogate) */
   3013                                         /* callback(illegal) */
   3014                                         cr[0] = CoderResult.malformedForLength(1);
   3015                                         break;
   3016                                     }
   3017                                 }
   3018                             } else {
   3019                                 doread = true;
   3020                             }
   3021                             /* convert the Unicode code point in c into codepage bytes */
   3022 
   3023                             /*
   3024                              * The basic lookup is a triple-stage compact array (trie) lookup. For details see the
   3025                              * beginning of this file.
   3026                              *
   3027                              * Single-byte codepages are handled with a different data structure by _MBCSSingle...
   3028                              * functions.
   3029                              *
   3030                              * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are
   3031                              * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0
   3032                              * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are
   3033                              * flags for which of the 16 characters in the block are roundtrip-assigned.
   3034                              *
   3035                              * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as
   3036                              * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in
   3037                              * big-endian order.
   3038                              *
   3039                              * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest
   3040                              * byte sequences, the first two bytes in this third stage indicate with their 7th bits
   3041                              * whether these bytes are to be written directly or actually need to be preceeded by one of
   3042                              * the two Single-Shift codes. With this, the third stage stores one byte fewer per
   3043                              * character than the actual maximum length of EUC byte sequences.
   3044                              *
   3045                              * Other than that, leading zero bytes are removed and the other bytes output. A single zero
   3046                              * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
   3047                              * support zero byte output as a fallback, and also does not allow output of leading zeros.
   3048                              */
   3049                             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
   3050 
   3051                             /* get the bytes and the length for the output */
   3052                             switch (outputType) {
   3053                             /* This is handled above with the method cnvMBCSDoubleFromUnicodeWithOffsets() */
   3054                             /* case MBCS_OUTPUT_2:
   3055                                 value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
   3056                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
   3057                                     length = 1;
   3058                                 } else {
   3059                                     length = 2;
   3060                                 }
   3061                                 break; */
   3062                             case MBCS_OUTPUT_2_SISO:
   3063                                 /* 1/2-byte stateful with Shift-In/Shift-Out */
   3064                                 /*
   3065                                  * Save the old state in the converter object right here, then change the local
   3066                                  * prevLength state variable if necessary. Then, if this character turns out to be
   3067                                  * unassigned or a fallback that is not taken, the callback code must not save the new
   3068                                  * state in the converter because the new state is for a character that is not output.
   3069                                  * However, the callback must still restore the state from the converter in case the
   3070                                  * callback function changed it for its output.
   3071                                  */
   3072                                 fromUnicodeStatus = prevLength; /* save the old state */
   3073                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   3074                                 if (value <= 0xff) {
   3075                                     if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
   3076                                         /* no mapping, leave value==0 */
   3077                                         length = 0;
   3078                                     } else if (prevLength <= 1) {
   3079                                         length = 1;
   3080                                     } else {
   3081                                         /* change from double-byte mode to single-byte */
   3082                                         if (si_value_length == 1) {
   3083                                             value|=si_value[0]<<8;
   3084                                             length = 2;
   3085                                         } else if (si_value_length == 2) {
   3086                                             value|=si_value[1]<<8;
   3087                                             value|=si_value[0]<<16;
   3088                                             length = 3;
   3089                                         }
   3090                                         prevLength = 1;
   3091                                     }
   3092                                 } else {
   3093                                     if (prevLength == 2) {
   3094                                         length = 2;
   3095                                     } else {
   3096                                         /* change from single-byte mode to double-byte */
   3097                                         if (so_value_length == 1) {
   3098                                             value|=so_value[0]<<16;
   3099                                             length = 3;
   3100                                         } else if (so_value_length == 2) {
   3101                                             value|=so_value[1]<<16;
   3102                                             value|=so_value[0]<<24;
   3103                                             length = 4;
   3104                                         }
   3105                                         prevLength = 2;
   3106                                     }
   3107                                 }
   3108                                 break;
   3109                             case MBCS_OUTPUT_DBCS_ONLY:
   3110                                 /* table with single-byte results, but only DBCS mappings used */
   3111                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   3112                                 if (value <= 0xff) {
   3113                                     /* no mapping or SBCS result, not taken for DBCS-only */
   3114                                     value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
   3115                                     length = 0;
   3116                                 } else {
   3117                                     length = 2;
   3118                                 }
   3119                                 break;
   3120                             case MBCS_OUTPUT_3:
   3121                                 pArray = bytes;
   3122                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
   3123                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
   3124                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
   3125                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
   3126                                 if (value <= 0xff) {
   3127                                     length = 1;
   3128                                 } else if (value <= 0xffff) {
   3129                                     length = 2;
   3130                                 } else {
   3131                                     length = 3;
   3132                                 }
   3133                                 break;
   3134                             case MBCS_OUTPUT_4:
   3135                                 value = MBCS_VALUE_4_FROM_STAGE_2(ints, stage2Entry, c);
   3136                                 if (value < 0) {
   3137                                     // Half of the 4-byte values look negative in a signed int.
   3138                                     length = 4;
   3139                                 } else if (value <= 0xff) {
   3140                                     length = 1;
   3141                                 } else if (value <= 0xffff) {
   3142                                     length = 2;
   3143                                 } else if (value <= 0xffffff) {
   3144                                     length = 3;
   3145                                 } else {
   3146                                     length = 4;
   3147                                 }
   3148                                 break;
   3149                             case MBCS_OUTPUT_3_EUC:
   3150                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   3151                                 /* EUC 16-bit fixed-length representation */
   3152                                 if (value <= 0xff) {
   3153                                     length = 1;
   3154                                 } else if ((value & 0x8000) == 0) {
   3155                                     value |= 0x8e8000;
   3156                                     length = 3;
   3157                                 } else if ((value & 0x80) == 0) {
   3158                                     value |= 0x8f0080;
   3159                                     length = 3;
   3160                                 } else {
   3161                                     length = 2;
   3162                                 }
   3163                                 break;
   3164                             case MBCS_OUTPUT_4_EUC:
   3165                                 pArray = bytes;
   3166                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
   3167                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
   3168                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
   3169                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
   3170                                 /* EUC 16-bit fixed-length representation applied to the first two bytes */
   3171                                 if (value <= 0xff) {
   3172                                     length = 1;
   3173                                 } else if (value <= 0xffff) {
   3174                                     length = 2;
   3175                                 } else if ((value & 0x800000) == 0) {
   3176                                     value |= 0x8e800000;
   3177                                     length = 4;
   3178                                 } else if ((value & 0x8000) == 0) {
   3179                                     value |= 0x8f008000;
   3180                                     length = 4;
   3181                                 } else {
   3182                                     length = 3;
   3183                                 }
   3184                                 break;
   3185                             default:
   3186                                 /* must not occur */
   3187                                 /*
   3188                                  * To avoid compiler warnings that value & length may be used without having been
   3189                                  * initialized, we set them here. In reality, this is unreachable code. Not having a
   3190                                  * default branch also causes warnings with some compilers.
   3191                                  */
   3192                                 value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
   3193                                 length = 0;
   3194                                 break;
   3195                             }
   3196 
   3197                             /* is this code point assigned, or do we use fallbacks? */
   3198                             if (gotoUnassigned || (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0)))) {
   3199                                 gotoUnassigned = false;
   3200                                 /*
   3201                                  * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
   3202                                  * with this data structure for fallback output to be a zero byte.
   3203                                  */
   3204 
   3205                                 // unassigned:
   3206                                 SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
   3207                                         prevSourceIndex, prevLength);
   3208                                 doloop = unassigned(source, target, offsets, x, flush, cr);
   3209                                 c = x.c;
   3210                                 sourceArrayIndex = x.sourceArrayIndex;
   3211                                 sourceIndex = x.sourceIndex;
   3212                                 nextSourceIndex = x.nextSourceIndex;
   3213                                 prevSourceIndex = x.prevSourceIndex;
   3214                                 prevLength = x.prevLength;
   3215                                 if (doloop)
   3216                                     continue;
   3217                                 else
   3218                                     break;
   3219                             }
   3220 
   3221                             /* write the output character bytes from value and length */
   3222                             /* from the first if in the loop we know that targetCapacity>0 */
   3223                             if (length <= target.remaining()) {
   3224                                 switch (length) {
   3225                                 /* each branch falls through to the next one */
   3226                                 case 4:
   3227                                     target.put((byte) (value >>> 24));
   3228                                     if (offsets != null) {
   3229                                         offsets.put(sourceIndex);
   3230                                     }
   3231                                 case 3:
   3232                                     target.put((byte) (value >>> 16));
   3233                                     if (offsets != null) {
   3234                                         offsets.put(sourceIndex);
   3235                                     }
   3236                                 case 2:
   3237                                     target.put((byte) (value >>> 8));
   3238                                     if (offsets != null) {
   3239                                         offsets.put(sourceIndex);
   3240                                     }
   3241                                 case 1:
   3242                                     target.put((byte) value);
   3243                                     if (offsets != null) {
   3244                                         offsets.put(sourceIndex);
   3245                                     }
   3246                                 default:
   3247                                     /* will never occur */
   3248                                     break;
   3249                                 }
   3250                             } else {
   3251                                 int errorBufferArrayIndex;
   3252 
   3253                                 /*
   3254                                  * We actually do this backwards here: In order to save an intermediate variable, we
   3255                                  * output first to the overflow buffer what does not fit into the regular target.
   3256                                  */
   3257                                 /* we know that 1<=targetCapacity<length<=4 */
   3258                                 length -= target.remaining();
   3259 
   3260                                 errorBufferArrayIndex = 0;
   3261                                 switch (length) {
   3262                                 /* each branch falls through to the next one */
   3263                                 case 3:
   3264                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 16);
   3265                                 case 2:
   3266                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 8);
   3267                                 case 1:
   3268                                     errorBuffer[errorBufferArrayIndex] = (byte) value;
   3269                                 default:
   3270                                     /* will never occur */
   3271                                     break;
   3272                                 }
   3273                                 errorBufferLength = (byte) length;
   3274 
   3275                                 /* now output what fits into the regular target */
   3276                                 value >>>= 8 * length; /* length was reduced by targetCapacity */
   3277                                 switch (target.remaining()) {
   3278                                 /* each branch falls through to the next one */
   3279                                 case 3:
   3280                                     target.put((byte) (value >>> 16));
   3281                                     if (offsets != null) {
   3282                                         offsets.put(sourceIndex);
   3283                                     }
   3284                                 case 2:
   3285                                     target.put((byte) (value >>> 8));
   3286                                     if (offsets != null) {
   3287                                         offsets.put(sourceIndex);
   3288                                     }
   3289                                 case 1:
   3290                                     target.put((byte) value);
   3291                                     if (offsets != null) {
   3292                                         offsets.put(sourceIndex);
   3293                                     }
   3294                                 default:
   3295                                     /* will never occur */
   3296                                     break;
   3297                                 }
   3298 
   3299                                 /* target overflow */
   3300                                 cr[0] = CoderResult.OVERFLOW;
   3301                                 c = 0;
   3302                                 break;
   3303                             }
   3304 
   3305                             /* normal end of conversion: prepare for a new character */
   3306                             c = 0;
   3307                             if (offsets != null) {
   3308                                 prevSourceIndex = sourceIndex;
   3309                                 sourceIndex = nextSourceIndex;
   3310                             }
   3311                             continue;
   3312                         } else {
   3313                             /* target is full */
   3314                             cr[0] = CoderResult.OVERFLOW;
   3315                             break;
   3316                         }
   3317                     }
   3318                 }
   3319 
   3320                 /*
   3321                  * the end of the input stream and detection of truncated input are handled by the framework, but for
   3322                  * EBCDIC_STATEFUL conversion we need to emit an SI at the very end
   3323                  *
   3324                  * conditions: successful EBCDIC_STATEFUL in DBCS mode end of input and no truncated input
   3325                  */
   3326                 if (outputType == MBCS_OUTPUT_2_SISO && prevLength == 2 && flush && sourceArrayIndex >= source.limit()
   3327                         && c == 0) {
   3328 
   3329                     /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
   3330                     if (target.hasRemaining()) {
   3331                         target.put(si_value[0]);
   3332                         if (si_value_length == 2) {
   3333                             if (target.remaining() > 0) {
   3334                                 target.put(si_value[1]);
   3335                             } else {
   3336                                 errorBuffer[0] = si_value[1];
   3337                                 errorBufferLength = 1;
   3338                                 cr[0] = CoderResult.OVERFLOW;
   3339                             }
   3340                         }
   3341                         if (offsets != null) {
   3342                             /* set the last source character's index (sourceIndex points at sourceLimit now) */
   3343                             offsets.put(prevSourceIndex);
   3344                         }
   3345                     } else {
   3346                         /* target is full */
   3347                         errorBuffer[0] = si_value[0];
   3348                         if (si_value_length == 2) {
   3349                             errorBuffer[1] = si_value[1];
   3350                         }
   3351                         errorBufferLength = si_value_length;
   3352                         cr[0] = CoderResult.OVERFLOW;
   3353                     }
   3354                     prevLength = 1; /* we switched into SBCS */
   3355                 }
   3356 
   3357                 /* set the converter state back into UConverter */
   3358                 fromUChar32 = c;
   3359                 fromUnicodeStatus = prevLength;
   3360 
   3361                 source.position(sourceArrayIndex);
   3362             } catch (BufferOverflowException ex) {
   3363                 cr[0] = CoderResult.OVERFLOW;
   3364             }
   3365 
   3366             return cr[0];
   3367         }
   3368 
   3369         /*
   3370          * This is another simple conversion function for internal use by other conversion implementations. It does not
   3371          * use the converter state nor call callbacks. It does not handle the EBCDIC swaplfnl option (set in
   3372          * UConverter). It handles conversion extensions but not GB 18030.
   3373          *
   3374          * It converts one single Unicode code point into codepage bytes, encoded as one 32-bit value. The function
   3375          * returns the number of bytes in *pValue: 1..4 the number of bytes in *pValue 0 unassigned (*pValue undefined)
   3376          * -1 illegal (currently not used, *pValue undefined)
   3377          *
   3378          * *pValue will contain the resulting bytes with the last byte in bits 7..0, the second to last byte in bits
   3379          * 15..8, etc. Currently, the function assumes but does not check that 0<=c<=0x10ffff.
   3380          */
   3381         int fromUChar32(int c, int[] pValue, boolean isUseFallback) {
   3382             // #if 0
   3383             // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
   3384             // const uint8_t *p;
   3385             // #endif
   3386 
   3387             char[] table;
   3388             int stage2Entry;
   3389             int value;
   3390             int length;
   3391             int p;
   3392 
   3393             /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   3394             if (c <= 0xffff || sharedData.mbcs.hasSupplementary()) {
   3395                 table = sharedData.mbcs.fromUnicodeTable;
   3396 
   3397                 /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   3398                 if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) {
   3399                     value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
   3400                     /* is this code point assigned, or do we use fallbacks? */
   3401                     if (isUseFallback ? value >= 0x800 : value >= 0xc00) {
   3402                         pValue[0] = value & 0xff;
   3403                         return 1;
   3404                     }
   3405                 } else /* outputType!=MBCS_OUTPUT_1 */{
   3406                     int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
   3407                     stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
   3408 
   3409                     /* get the bytes and the length for the output */
   3410                     switch (sharedData.mbcs.outputType) {
   3411                     case MBCS_OUTPUT_2:
   3412                         value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeChars, stage2Entry, c);
   3413                         if (value <= 0xff) {
   3414                             length = 1;
   3415                         } else {
   3416                             length = 2;
   3417                         }
   3418                         break;
   3419                     // #if 0
   3420                     // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
   3421                     // case MBCS_OUTPUT_DBCS_ONLY:
   3422                     // /* table with single-byte results, but only DBCS mappings used */
   3423                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3424                     // if(value<=0xff) {
   3425                     // /* no mapping or SBCS result, not taken for DBCS-only */
   3426                     // value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
   3427                     // length=0;
   3428                     // } else {
   3429                     // length=2;
   3430                     // }
   3431                     // break;
   3432                     case MBCS_OUTPUT_3:
   3433                         byte[] bytes = sharedData.mbcs.fromUnicodeBytes;
   3434                         p = CharsetMBCS.MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
   3435                         value = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) |
   3436                             ((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) |
   3437                             (bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK);
   3438                         if (value <= 0xff) {
   3439                             length = 1;
   3440                         } else if (value <= 0xffff) {
   3441                             length = 2;
   3442                         } else {
   3443                             length = 3;
   3444                         }
   3445                         break;
   3446                     // case MBCS_OUTPUT_4:
   3447                     // value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3448                     // if(value<=0xff) {
   3449                     // length=1;
   3450                     // } else if(value<=0xffff) {
   3451                     // length=2;
   3452                     // } else if(value<=0xffffff) {
   3453                     // length=3;
   3454                     // } else {
   3455                     // length=4;
   3456                     // }
   3457                     // break;
   3458                     // case MBCS_OUTPUT_3_EUC:
   3459                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3460                     // /* EUC 16-bit fixed-length representation */
   3461                     // if(value<=0xff) {
   3462                     // length=1;
   3463                     // } else if((value&0x8000)==0) {
   3464                     // value|=0x8e8000;
   3465                     // length=3;
   3466                     // } else if((value&0x80)==0) {
   3467                     // value|=0x8f0080;
   3468                     // length=3;
   3469                     // } else {
   3470                     // length=2;
   3471                     // }
   3472                     // break;
   3473                     // case MBCS_OUTPUT_4_EUC:
   3474                     // p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   3475                     // value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
   3476                     // /* EUC 16-bit fixed-length representation applied to the first two bytes */
   3477                     // if(value<=0xff) {
   3478                     // length=1;
   3479                     // } else if(value<=0xffff) {
   3480                     // length=2;
   3481                     // } else if((value&0x800000)==0) {
   3482                     // value|=0x8e800000;
   3483                     // length=4;
   3484                     // } else if((value&0x8000)==0) {
   3485                     // value|=0x8f008000;
   3486                     // length=4;
   3487                     // } else {
   3488                     // length=3;
   3489                     // }
   3490                     // break;
   3491                     // #endif
   3492                     default:
   3493                         /* must not occur */
   3494                         return -1;
   3495                     }
   3496 
   3497                     /* is this code point assigned, or do we use fallbacks? */
   3498                     if (MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
   3499                             || (CharsetEncoderICU.isFromUUseFallback(isUseFallback, c) && value != 0)) {
   3500                         /*
   3501                          * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way with
   3502                          * this data structure for fallback output to be a zero byte.
   3503                          */
   3504                         /* assigned */
   3505                         pValue[0] = value;
   3506                         return length;
   3507                     }
   3508                 }
   3509             }
   3510 
   3511             if (sharedData.mbcs.extIndexes != null) {
   3512                 length = simpleMatchFromU(c, pValue, isUseFallback);
   3513                 return length >= 0 ? length : -length; /* return abs(length); */
   3514             }
   3515 
   3516             /* unassigned */
   3517             return 0;
   3518         }
   3519 
   3520         /*
   3521          * continue partial match with new input, requires cnv->preFromUFirstCP>=0 never called for simple,
   3522          * single-character conversion
   3523          */
   3524         private CoderResult continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush,
   3525                 int srcIndex) {
   3526             CoderResult cr = CoderResult.UNDERFLOW;
   3527             int[] value = new int[1];
   3528             int match;
   3529 
   3530             match = matchFromU(preFromUFirstCP, preFromUArray, preFromUBegin, preFromULength, source, value, useFallback, flush);
   3531             if (match >= 2) {
   3532                 match -= 2; /* remove 2 for the initial code point */
   3533 
   3534                 if (match >= preFromULength) {
   3535                     /* advance src pointer for the consumed input */
   3536                     source.position(source.position() + match - preFromULength);
   3537                     preFromULength = 0;
   3538                 } else {
   3539                     /* the match did not use all of preFromU[] - keep the rest for replay */
   3540                     int length = preFromULength - match;
   3541                     System.arraycopy(preFromUArray, preFromUBegin + match, preFromUArray, preFromUBegin, length);
   3542                     preFromULength = (byte) -length;
   3543                 }
   3544 
   3545                 /* finish the partial match */
   3546                 preFromUFirstCP = UConverterConstants.U_SENTINEL;
   3547 
   3548                 /* write result */
   3549                 writeFromU(value[0], target, offsets, srcIndex);
   3550             } else if (match < 0) {
   3551                 /* save state for partial match */
   3552                 int sArrayIndex;
   3553                 int j;
   3554 
   3555                 /* just _append_ the newly consumed input to preFromU[] */
   3556                 sArrayIndex = source.position();
   3557                 match = -match - 2; /* remove 2 for the initial code point */
   3558                 for (j = preFromULength; j < match; ++j) {
   3559                     preFromUArray[j] = source.get(sArrayIndex++);
   3560                 }
   3561                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
   3562                 preFromULength = (byte) match;
   3563             } else { /* match==0 or 1 */
   3564                 /*
   3565                  * no match
   3566                  *
   3567                  * We need to split the previous input into two parts:
   3568                  *
   3569                  * 1. The first code point is unmappable - that's how we got into trying the extension data in the first
   3570                  * place. We need to move it from the preFromU buffer to the error buffer, set an error code, and
   3571                  * prepare the rest of the previous input for 2.
   3572                  *
   3573                  * 2. The rest of the previous input must be converted once we come back from the callback for the first
   3574                  * code point. At that time, we have to try again from scratch to convert these input characters. The
   3575                  * replay will be handled by the ucnv.c conversion code.
   3576                  */
   3577 
   3578                 if (match == 1) {
   3579                     /* matched, no mapping but request for <subchar1> */
   3580                     useSubChar1 = true;
   3581                 }
   3582 
   3583                 /* move the first code point to the error field */
   3584                 fromUChar32 = preFromUFirstCP;
   3585                 preFromUFirstCP = UConverterConstants.U_SENTINEL;
   3586 
   3587                 /* mark preFromU for replay */
   3588                 preFromULength = (byte) -preFromULength;
   3589 
   3590                 /* set the error code for unassigned */
   3591                 // TODO: figure out what the unmappable length really should be
   3592                 cr = CoderResult.unmappableForLength(1);
   3593             }
   3594             return cr;
   3595         }
   3596 
   3597         /**
   3598          * @param cx
   3599          *            pointer to extension data; if NULL, returns 0
   3600          * @param firstCP
   3601          *            the first code point before all the other UChars
   3602          * @param pre
   3603          *            UChars that must match; !initialMatch: partial match with them
   3604          * @param preLength
   3605          *            length of pre, >=0
   3606          * @param src
   3607          *            UChars that can be used to complete a match
   3608          * @param srcLength
   3609          *            length of src, >=0
   3610          * @param pMatchValue
   3611          *            [out] output result value for the match from the data structure
   3612          * @param useFallback
   3613          *            "use fallback" flag, usually from cnv->useFallback
   3614          * @param flush
   3615          *            TRUE if the end of the input stream is reached
   3616          * @return >1: matched, return value=total match length (number of input units matched) 1: matched, no mapping
   3617          *         but request for <subchar1> (only for the first code point) 0: no match <0: partial match, return
   3618          *         value=negative total match length (partial matches are never returned for flush==TRUE) (partial
   3619          *         matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) the matchLength is 2 if only
   3620          *         firstCP matched, and >2 if firstCP and further code units matched
   3621          */
   3622         // static int32_t ucnv_extMatchFromU(const int32_t *cx, UChar32 firstCP, const UChar *pre, int32_t preLength,
   3623         // const UChar *src, int32_t srcLength, uint32_t *pMatchValue, UBool useFallback, UBool flush)
   3624         private int matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source,
   3625                 int[] pMatchValue, boolean isUseFallback, boolean flush) {
   3626             ByteBuffer cx = sharedData.mbcs.extIndexes;
   3627 
   3628             CharBuffer stage12, stage3;
   3629             IntBuffer stage3b;
   3630 
   3631             CharBuffer fromUTableUChars, fromUSectionUChars;
   3632             IntBuffer fromUTableValues, fromUSectionValues;
   3633 
   3634             int value, matchValue;
   3635             int i, j, index, length, matchLength;
   3636             char c;
   3637 
   3638             if (cx == null) {
   3639                 return 0; /* no extension data, no match */
   3640             }
   3641 
   3642             /* trie lookup of firstCP */
   3643             index = firstCP >>> 10; /* stage 1 index */
   3644             if (index >= cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) {
   3645                 return 0; /* the first code point is outside the trie */
   3646             }
   3647 
   3648             stage12 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);
   3649             stage3 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);
   3650             index = FROM_U(stage12, stage3, index, firstCP);
   3651 
   3652             stage3b = (IntBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);
   3653             value = stage3b.get(stage3b.position() + index);
   3654             if (value == 0) {
   3655                 return 0;
   3656             }
   3657 
   3658             if (TO_U_IS_PARTIAL(value)) {
   3659                 /* partial match, enter the loop below */
   3660                 index = FROM_U_GET_PARTIAL_INDEX(value);
   3661 
   3662                 /* initialize */
   3663                 fromUTableUChars = (CharBuffer) ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);
   3664                 fromUTableValues = (IntBuffer) ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);
   3665 
   3666                 matchValue = 0;
   3667                 i = j = matchLength = 0;
   3668 
   3669                 /* we must not remember fallback matches when not using fallbacks */
   3670 
   3671                 /* match input units until there is a full match or the input is consumed */
   3672                 for (;;) {
   3673                     /* go to the next section */
   3674                     int oldpos = fromUTableUChars.position();
   3675                     fromUSectionUChars = ((CharBuffer) fromUTableUChars.position(index)).slice();
   3676                     fromUTableUChars.position(oldpos);
   3677                     oldpos = fromUTableValues.position();
   3678                     fromUSectionValues = ((IntBuffer) fromUTableValues.position(index)).slice();
   3679                     fromUTableValues.position(oldpos);
   3680 
   3681                     /* read first pair of the section */
   3682                     length = fromUSectionUChars.get();
   3683                     value = fromUSectionValues.get();
   3684                     if (value != 0 && (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP))) {
   3685                         /* remember longest match so far */
   3686                         matchValue = value;
   3687                         matchLength = 2 + i + j;
   3688                     }
   3689 
   3690                     /* match pre[] then src[] */
   3691                     if (i < preLength) {
   3692                         c = preArray[preArrayBegin + i++];
   3693                     } else if (source != null && j < source.remaining()) {
   3694                         c = source.get(source.position() + j++);
   3695                     } else {
   3696                         /* all input consumed, partial match */
   3697                         if (flush || (length = (i + j)) > MAX_UCHARS) {
   3698                             /*
   3699                              * end of the entire input stream, stop with the longest match so far or: partial match must
   3700                              * not be longer than UCNV_EXT_MAX_UCHARS because it must fit into state buffers
   3701                              */
   3702                             break;
   3703                         } else {
   3704                             /* continue with more input next time */
   3705                             return -(2 + length);
   3706                         }
   3707                     }
   3708 
   3709                     /* search for the current UChar */
   3710                     index = findFromU(fromUSectionUChars, length, c);
   3711                     if (index < 0) {
   3712                         /* no match here, stop with the longest match so far */
   3713                         break;
   3714                     } else {
   3715                         value = fromUSectionValues.get(fromUSectionValues.position() + index);
   3716                         if (FROM_U_IS_PARTIAL(value)) {
   3717                             /* partial match, continue */
   3718                             index = FROM_U_GET_PARTIAL_INDEX(value);
   3719                         } else {
   3720                             if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
   3721                                 /* full match, stop with result */
   3722                                 matchValue = value;
   3723                                 matchLength = 2 + i + j;
   3724                             } else {
   3725                                 /* full match on fallback not taken, stop with the longest match so far */
   3726                             }
   3727                             break;
   3728                         }
   3729                     }
   3730                 }
   3731 
   3732                 if (matchLength == 0) {
   3733                     /* no match at all */
   3734                     return 0;
   3735                 }
   3736             } else /* result from firstCP trie lookup */{
   3737                 if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
   3738                     /* full match, stop with result */
   3739                     matchValue = value;
   3740                     matchLength = 2;
   3741                 } else {
   3742                     /* fallback not taken */
   3743                     return 0;
   3744                 }
   3745             }
   3746 
   3747             if ((matchValue & FROM_U_RESERVED_MASK) != 0) {
   3748                 /* do not interpret values with reserved bits used, for forward compatibility */
   3749                 return 0;
   3750             }
   3751 
   3752             /* return result */
   3753             if (matchValue == FROM_U_SUBCHAR1) {
   3754                 return 1; /* assert matchLength==2 */
   3755             }
   3756 
   3757             pMatchValue[0] = FROM_U_MASK_ROUNDTRIP(matchValue);
   3758             return matchLength;
   3759         }
   3760 
   3761         private int simpleMatchFromU(int cp, int[] pValue, boolean isUseFallback) {
   3762             int[] value = new int[1];
   3763             int match; // signed
   3764 
   3765             /* try to match */
   3766             match = matchFromU(cp, null, 0, 0, null, value, isUseFallback, true);
   3767             if (match >= 2) {
   3768                 /* write result for simple, single-character conversion */
   3769                 int length;
   3770                 boolean isRoundtrip;
   3771 
   3772                 isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]);
   3773                 length = FROM_U_GET_LENGTH(value[0]);
   3774                 value[0] = FROM_U_GET_DATA(value[0]);
   3775 
   3776                 if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) {
   3777                     pValue[0] = value[0];
   3778                     return isRoundtrip ? length : -length;
   3779                     // #if 0 /* not currently used */
   3780                     // } else if(length==4) {
   3781                     // /* de-serialize a 4-byte result */
   3782                     // const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
   3783                     // *pValue=
   3784                     // ((uint32_t)result[0]<<24)|
   3785                     // ((uint32_t)result[1]<<16)|
   3786                     // ((uint32_t)result[2]<<8)|
   3787                     // result[3];
   3788                     // return isRoundtrip ? 4 : -4;
   3789                     // #endif
   3790                 }
   3791             }
   3792 
   3793             /*
   3794              * return no match because - match>1 && resultLength>4: result too long for simple conversion - match==1: no
   3795              * match found, <subchar1> preferred - match==0: no match found in the first place - match<0: partial
   3796              * match, not supported for simple conversion (and flush==TRUE)
   3797              */
   3798             return 0;
   3799         }
   3800 
   3801         @SuppressWarnings("fallthrough")
   3802         private CoderResult writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex) {
   3803             ByteBuffer cx = sharedData.mbcs.extIndexes;
   3804 
   3805             byte bufferArray[] = new byte[1 + MAX_BYTES];
   3806             int bufferArrayIndex = 0;
   3807             byte[] resultArray;
   3808             int resultArrayIndex;
   3809             int length, prevLength;
   3810 
   3811             length = FROM_U_GET_LENGTH(value);
   3812             value = FROM_U_GET_DATA(value);
   3813 
   3814             /* output the result */
   3815             if (length <= FROM_U_MAX_DIRECT_LENGTH) {
   3816                 /*
   3817                  * Generate a byte array and then write it below. This is not the fastest possible way, but it should be
   3818                  * ok for extension mappings, and it is much simpler. Offset and overflow handling are only done once
   3819                  * this way.
   3820                  */
   3821                 int p = bufferArrayIndex + 1; /* reserve buffer[0] for shiftByte below */
   3822                 switch (length) {
   3823                 case 3:
   3824                     bufferArray[p++] = (byte) (value >>> 16);
   3825                 case 2:
   3826                     bufferArray[p++] = (byte) (value >>> 8);
   3827                 case 1:
   3828                     bufferArray[p++] = (byte) value;
   3829                 default:
   3830                     break; /* will never occur */
   3831                 }
   3832                 resultArray = bufferArray;
   3833                 resultArrayIndex = bufferArrayIndex + 1;
   3834             } else {
   3835                 byte[] slice = new byte[length];
   3836 
   3837                 ByteBuffer bb = ((ByteBuffer) ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class));
   3838                 bb.position(value);
   3839                 bb.get(slice, 0, slice.length);
   3840 
   3841                 resultArray = slice;
   3842                 resultArrayIndex = 0;
   3843             }
   3844 
   3845             /* with correct data we have length>0 */
   3846 
   3847             if ((prevLength = fromUnicodeStatus) != 0) {
   3848                 /* handle SI/SO stateful output */
   3849                 byte shiftByte;
   3850 
   3851                 if (prevLength > 1 && length == 1) {
   3852                     /* change from double-byte mode to single-byte */
   3853                     shiftByte = (byte) UConverterConstants.SI;
   3854                     fromUnicodeStatus = 1;
   3855                 } else if (prevLength == 1 && length > 1) {
   3856                     /* change from single-byte mode to double-byte */
   3857                     shiftByte = (byte) UConverterConstants.SO;
   3858                     fromUnicodeStatus = 2;
   3859                 } else {
   3860                     shiftByte = 0;
   3861                 }
   3862 
   3863                 if (shiftByte != 0) {
   3864                     /* prepend the shift byte to the result bytes */
   3865                     bufferArray[0] = shiftByte;
   3866                     if (resultArray != bufferArray || resultArrayIndex != bufferArrayIndex + 1) {
   3867                         System.arraycopy(resultArray, resultArrayIndex, bufferArray, bufferArrayIndex + 1, length);
   3868                     }
   3869                     resultArray = bufferArray;
   3870                     resultArrayIndex = bufferArrayIndex;
   3871                     ++length;
   3872                 }
   3873             }
   3874 
   3875             return fromUWriteBytes(this, resultArray, resultArrayIndex, length, target, offsets, srcIndex);
   3876         }
   3877 
   3878         /*
   3879          * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written
   3880          * to the target
   3881          */
   3882         private int fromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
   3883                 int length, boolean flush, CoderResult[] cr) {
   3884             // ByteBuffer cx;
   3885 
   3886             useSubChar1 = false;
   3887 
   3888             if (sharedData.mbcs.extIndexes != null
   3889                     && initialMatchFromU(cp, source, target, offsets, sourceIndex, flush, cr)) {
   3890                 return 0; /* an extension mapping handled the input */
   3891             }
   3892 
   3893             /* GB 18030 */
   3894             if ((options & MBCS_OPTION_GB18030) != 0) {
   3895                 int[] range;
   3896                 int i;
   3897 
   3898                 for (i = 0; i < gb18030Ranges.length; ++i) {
   3899                     range = gb18030Ranges[i];
   3900                     if (range[0] <= cp && cp <= range[1]) {
   3901                         /* found the Unicode code point, output the four-byte sequence for it */
   3902                         int linear;
   3903                         byte bytes[] = new byte[4];
   3904 
   3905                         /* get the linear value of the first GB 18030 code in this range */
   3906                         linear = range[2] - LINEAR_18030_BASE;
   3907 
   3908                         /* add the offset from the beginning of the range */
   3909                         linear += (cp - range[0]);
   3910 
   3911                         bytes[3] = (byte) (0x30 + linear % 10);
   3912                         linear /= 10;
   3913                         bytes[2] = (byte) (0x81 + linear % 126);
   3914                         linear /= 126;
   3915                         bytes[1] = (byte) (0x30 + linear % 10);
   3916                         linear /= 10;
   3917                         bytes[0] = (byte) (0x81 + linear);
   3918 
   3919                         /* output this sequence */
   3920                         cr[0] = fromUWriteBytes(this, bytes, 0, 4, target, offsets, sourceIndex);
   3921                         return 0;
   3922                     }
   3923                 }
   3924             }
   3925 
   3926             /* no mapping */
   3927             cr[0] = CoderResult.unmappableForLength(length);
   3928             return cp;
   3929         }
   3930 
   3931         /*
   3932          * target<targetLimit; set error code for overflow
   3933          */
   3934         private boolean initialMatchFromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets,
   3935                 int srcIndex, boolean flush, CoderResult[] cr) {
   3936             int[] value = new int[1];
   3937             int match;
   3938 
   3939             /* try to match */
   3940             match = matchFromU(cp, null, 0, 0, source, value, useFallback, flush);
   3941 
   3942             /* reject a match if the result is a single byte for DBCS-only */
   3943             if (match >= 2
   3944                     && !(FROM_U_GET_LENGTH(value[0]) == 1 && sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY)) {
   3945                 /* advance src pointer for the consumed input */
   3946                 source.position(source.position() + match - 2); /* remove 2 for the initial code point */
   3947 
   3948                 /* write result to target */
   3949                 cr[0] = writeFromU(value[0], target, offsets, srcIndex);
   3950                 return true;
   3951             } else if (match < 0) {
   3952                 /* save state for partial match */
   3953                 int sArrayIndex;
   3954                 int j;
   3955 
   3956                 /* copy the first code point */
   3957                 preFromUFirstCP = cp;
   3958 
   3959                 /* now copy the newly consumed input */
   3960                 sArrayIndex = source.position();
   3961                 match = -match - 2; /* remove 2 for the initial code point */
   3962                 for (j = 0; j < match; ++j) {
   3963                     preFromUArray[j] = source.get(sArrayIndex++);
   3964                 }
   3965                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
   3966                 preFromULength = (byte) match;
   3967                 return true;
   3968             } else if (match == 1) {
   3969                 /* matched, no mapping but request for <subchar1> */
   3970                 useSubChar1 = true;
   3971                 return false;
   3972             } else /* match==0 no match */{
   3973                 return false;
   3974             }
   3975         }
   3976 
   3977         CoderResult cnvMBCSFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
   3978             // Just call encodeLoop to remove duplicate code.
   3979             return encodeLoop(source, target, offsets, flush);
   3980         }
   3981 
   3982         /*
   3983          * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages that map only to and from the
   3984          * BMP. In addition to single-byte/state optimizations, the offset calculations become much easier.
   3985          */
   3986         private CoderResult cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets,
   3987                 boolean flush) {
   3988 
   3989             CoderResult[] cr = { CoderResult.UNDERFLOW };
   3990 
   3991             int sourceArrayIndex, lastSource;
   3992             int targetCapacity, length;
   3993             char[] table;
   3994             char[] results;
   3995 
   3996             int c, sourceIndex;
   3997             char value, minValue;
   3998 
   3999             /* set up the local pointers */
   4000             sourceArrayIndex = source.position();
   4001             targetCapacity = target.remaining();
   4002             table = sharedData.mbcs.fromUnicodeTable;
   4003 
   4004             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   4005                 results = sharedData.mbcs.swapLFNLFromUnicodeChars;
   4006             } else {
   4007                 results = sharedData.mbcs.fromUnicodeChars;
   4008             }
   4009 
   4010             if (useFallback) {
   4011                 /* use all roundtrip and fallback results */
   4012                 minValue = 0x800;
   4013             } else {
   4014                 /* use only roundtrips and fallbacks from private-use characters */
   4015                 minValue = 0xc00;
   4016             }
   4017 
   4018             /* get the converter state from UConverter */
   4019             c = fromUChar32;
   4020 
   4021             /* sourceIndex=-1 if the current character began in the previous buffer */
   4022             sourceIndex = c == 0 ? 0 : -1;
   4023             lastSource = sourceArrayIndex;
   4024 
   4025             /*
   4026              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
   4027              * sourceLength and targetCapacity
   4028              */
   4029             length = source.limit() - sourceArrayIndex;
   4030             if (length < targetCapacity) {
   4031                 targetCapacity = length;
   4032             }
   4033 
   4034             boolean doloop = true;
   4035             if (c != 0 && targetCapacity > 0) {
   4036                 SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
   4037                 doloop = getTrailSingleBMP(source, x, cr);
   4038                 c = x.c;
   4039                 sourceArrayIndex = x.sourceArrayIndex;
   4040             }
   4041 
   4042             if (doloop) {
   4043                 while (targetCapacity > 0) {
   4044                     /*
   4045                      * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate pair
   4046                      * for a "supplementary code point".
   4047                      */
   4048                     c = source.get(sourceArrayIndex++);
   4049                     /*
   4050                      * Do not immediately check for single surrogates: Assume that they are unassigned and check for
   4051                      * them in that case. This speeds up the conversion of assigned characters.
   4052                      */
   4053                     /* convert the Unicode code point in c into codepage bytes */
   4054                     value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
   4055 
   4056                     /* is this code point assigned, or do we use fallbacks? */
   4057                     if (value >= minValue) {
   4058                         /* assigned, write the output character bytes from value and length */
   4059                         /* length==1 */
   4060                         /* this is easy because we know that there is enough space */
   4061                         target.put((byte) value);
   4062                         --targetCapacity;
   4063 
   4064                         /* normal end of conversion: prepare for a new character */
   4065                         c = 0;
   4066                         continue;
   4067                     } else if (!UTF16.isSurrogate((char) c)) {
   4068                         /* normal, unassigned BMP character */
   4069                     } else if (UTF16.isLeadSurrogate((char) c)) {
   4070                         // getTrail:
   4071                         SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
   4072                         doloop = getTrailSingleBMP(source, x, cr);
   4073                         c = x.c;
   4074                         sourceArrayIndex = x.sourceArrayIndex;
   4075                         if (!doloop)
   4076                             break;
   4077                     } else {
   4078                         /* this is an unmatched trail code unit (2nd surrogate) */
   4079                         /* callback(illegal) */
   4080                         cr[0] = CoderResult.malformedForLength(1);
   4081                         break;
   4082                     }
   4083 
   4084                     /* c does not have a mapping */
   4085 
   4086                     /* get the number of code units for c to correctly advance sourceIndex */
   4087                     length = UTF16.getCharCount(c);
   4088 
   4089                     /* set offsets since the start or the last extension */
   4090                     if (offsets != null) {
   4091                         int count = sourceArrayIndex - lastSource;
   4092 
   4093                         /* do not set the offset for this character */
   4094                         count -= length;
   4095 
   4096                         while (count > 0) {
   4097                             offsets.put(sourceIndex++);
   4098                             --count;
   4099                         }
   4100                         /* offsets and sourceIndex are now set for the current character */
   4101                     }
   4102 
   4103                     /* try an extension mapping */
   4104                     lastSource = sourceArrayIndex;
   4105                     source.position(sourceArrayIndex);
   4106                     c = fromU(c, source, target, offsets, sourceIndex, length, flush, cr);
   4107                     sourceArrayIndex = source.position();
   4108                     sourceIndex += length + (sourceArrayIndex - lastSource);
   4109                     lastSource = sourceArrayIndex;
   4110 
   4111                     if (cr[0].isError()) {
   4112                         /* not mappable or buffer overflow */
   4113                         break;
   4114                     } else {
   4115                         /* a mapping was written to the target, continue */
   4116 
   4117                         /* recalculate the targetCapacity after an extension mapping */
   4118                         targetCapacity = target.remaining();
   4119                         length = source.limit() - sourceArrayIndex;
   4120                         if (length < targetCapacity) {
   4121                             targetCapacity = length;
   4122                         }
   4123                     }
   4124                 }
   4125             }
   4126 
   4127             if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
   4128                 /* target is full */
   4129                 cr[0] = CoderResult.OVERFLOW;
   4130             }
   4131 
   4132             /* set offsets since the start or the last callback */
   4133             if (offsets != null) {
   4134                 int count = sourceArrayIndex - lastSource;
   4135                 while (count > 0) {
   4136                     offsets.put(sourceIndex++);
   4137                     --count;
   4138                 }
   4139             }
   4140 
   4141             /* set the converter state back into UConverter */
   4142             fromUChar32 = c;
   4143 
   4144             /* write back the updated pointers */
   4145             source.position(sourceArrayIndex);
   4146 
   4147             return cr[0];
   4148         }
   4149 
   4150         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
   4151         private CoderResult cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
   4152                 IntBuffer offsets, boolean flush) {
   4153 
   4154             CoderResult[] cr = { CoderResult.UNDERFLOW };
   4155 
   4156             int sourceArrayIndex;
   4157 
   4158             char[] table;
   4159             char[] results;
   4160 
   4161             int c;
   4162             int sourceIndex, nextSourceIndex;
   4163 
   4164             char value, minValue;
   4165 
   4166             /* set up the local pointers */
   4167             short uniMask;
   4168             sourceArrayIndex = source.position();
   4169 
   4170             table = sharedData.mbcs.fromUnicodeTable;
   4171 
   4172             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   4173                 results = sharedData.mbcs.swapLFNLFromUnicodeChars;
   4174             } else {
   4175                 results = sharedData.mbcs.fromUnicodeChars;
   4176             }
   4177 
   4178             if (useFallback) {
   4179                 /* use all roundtrip and fallback results */
   4180                 minValue = 0x800;
   4181             } else {
   4182                 /* use only roundtrips and fallbacks from private-use characters */
   4183                 minValue = 0xc00;
   4184             }
   4185             // agljport:comment hasSupplementary only used in getTrail block which now simply repeats the mask operation
   4186             uniMask = sharedData.mbcs.unicodeMask;
   4187 
   4188             /* get the converter state from UConverter */
   4189             c = fromUChar32;
   4190 
   4191             /* sourceIndex=-1 if the current character began in the previous buffer */
   4192             sourceIndex = c == 0 ? 0 : -1;
   4193             nextSourceIndex = 0;
   4194 
   4195             boolean doloop = true;
   4196             boolean doread = true;
   4197             if (c != 0 && target.hasRemaining()) {
   4198                 if (UTF16.isLeadSurrogate((char) c)) {
   4199                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
   4200                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4201                     doread = x.doread;
   4202                     c = x.c;
   4203                     sourceArrayIndex = x.sourceArrayIndex;
   4204                     sourceIndex = x.sourceIndex;
   4205                     nextSourceIndex = x.nextSourceIndex;
   4206                 } else {
   4207                     doread = false;
   4208                 }
   4209             }
   4210 
   4211             if (doloop) {
   4212                 while (!doread || sourceArrayIndex < source.limit()) {
   4213                     /*
   4214                      * This following test is to see if available input would overflow the output. It does not catch
   4215                      * output of more than one byte that overflows as a result of a multi-byte character or callback
   4216                      * output from the last source character. Therefore, those situations also test for overflows and
   4217                      * will then break the loop, too.
   4218                      */
   4219                     if (target.hasRemaining()) {
   4220                         /*
   4221                          * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate
   4222                          * pair for a "supplementary code point".
   4223                          */
   4224 
   4225                         if (doread) {
   4226                             c = source.get(sourceArrayIndex++);
   4227                             ++nextSourceIndex;
   4228                             if (UTF16.isSurrogate((char) c)) {
   4229                                 if (UTF16.isLeadSurrogate((char) c)) {
   4230                                     // getTrail:
   4231                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4232                                             nextSourceIndex);
   4233                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4234                                     c = x.c;
   4235                                     sourceArrayIndex = x.sourceArrayIndex;
   4236                                     sourceIndex = x.sourceIndex;
   4237                                     nextSourceIndex = x.nextSourceIndex;
   4238                                     if (x.doread) {
   4239                                         if (doloop)
   4240                                             continue;
   4241                                         else
   4242                                             break;
   4243                                     }
   4244                                 } else {
   4245                                     /* this is an unmatched trail code unit (2nd surrogate) */
   4246                                     /* callback(illegal) */
   4247                                     cr[0] = CoderResult.malformedForLength(1);
   4248                                     break;
   4249                                 }
   4250                             }
   4251                         } else {
   4252                             doread = true;
   4253                         }
   4254 
   4255                         /* convert the Unicode code point in c into codepage bytes */
   4256                         value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
   4257 
   4258                         /* is this code point assigned, or do we use fallbacks? */
   4259                         if (value >= minValue) {
   4260                             /* assigned, write the output character bytes from value and length */
   4261                             /* length==1 */
   4262                             /* this is easy because we know that there is enough space */
   4263                             target.put((byte) value);
   4264                             if (offsets != null) {
   4265                                 offsets.put(sourceIndex);
   4266                             }
   4267 
   4268                             /* normal end of conversion: prepare for a new character */
   4269                             c = 0;
   4270                             sourceIndex = nextSourceIndex;
   4271                         } else { /* unassigned */
   4272                             /* try an extension mapping */
   4273                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4274                                     nextSourceIndex);
   4275                             doloop = unassignedDouble(source, target, x, flush, cr);
   4276                             c = x.c;
   4277                             sourceArrayIndex = x.sourceArrayIndex;
   4278                             sourceIndex = x.sourceIndex;
   4279                             nextSourceIndex = x.nextSourceIndex;
   4280                             if (!doloop)
   4281                                 break;
   4282                         }
   4283                     } else {
   4284                         /* target is full */
   4285                         cr[0] = CoderResult.OVERFLOW;
   4286                         break;
   4287                     }
   4288                 }
   4289             }
   4290 
   4291             /* set the converter state back into UConverter */
   4292             fromUChar32 = c;
   4293 
   4294             /* write back the updated pointers */
   4295             source.position(sourceArrayIndex);
   4296 
   4297             return cr[0];
   4298         }
   4299 
   4300         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
   4301         private CoderResult cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
   4302                 IntBuffer offsets, boolean flush) {
   4303             CoderResult[] cr = { CoderResult.UNDERFLOW };
   4304 
   4305             int sourceArrayIndex;
   4306 
   4307             char[] table;
   4308             char[] chars;
   4309 
   4310             int c, sourceIndex, nextSourceIndex;
   4311 
   4312             int stage2Entry;
   4313             int value;
   4314             int length;
   4315             short uniMask;
   4316 
   4317             /* use optimized function if possible */
   4318             uniMask = sharedData.mbcs.unicodeMask;
   4319 
   4320             /* set up the local pointers */
   4321             sourceArrayIndex = source.position();
   4322 
   4323             table = sharedData.mbcs.fromUnicodeTable;
   4324             int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
   4325 
   4326             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
   4327                 chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
   4328             } else {
   4329                 chars = sharedData.mbcs.fromUnicodeChars;
   4330             }
   4331 
   4332             /* get the converter state from UConverter */
   4333             c = fromUChar32;
   4334 
   4335             /* sourceIndex=-1 if the current character began in the previous buffer */
   4336             sourceIndex = c == 0 ? 0 : -1;
   4337             nextSourceIndex = 0;
   4338 
   4339             /* conversion loop */
   4340             boolean doloop = true;
   4341             boolean doread = true;
   4342             if (c != 0 && target.hasRemaining()) {
   4343                 if (UTF16.isLeadSurrogate((char) c)) {
   4344                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
   4345                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4346                     doread = x.doread;
   4347                     c = x.c;
   4348                     sourceArrayIndex = x.sourceArrayIndex;
   4349                     sourceIndex = x.sourceIndex;
   4350                     nextSourceIndex = x.nextSourceIndex;
   4351                 } else {
   4352                     doread = false;
   4353                 }
   4354             }
   4355 
   4356             if (doloop) {
   4357                 while (!doread || sourceArrayIndex < source.limit()) {
   4358                     /*
   4359                      * This following test is to see if available input would overflow the output. It does not catch
   4360                      * output of more than one byte that overflows as a result of a multi-byte character or callback
   4361                      * output from the last source character. Therefore, those situations also test for overflows and
   4362                      * will then break the loop, too.
   4363                      */
   4364                     if (target.hasRemaining()) {
   4365                         if (doread) {
   4366                             /*
   4367                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
   4368                              * surrogate pair for a "supplementary code point".
   4369                              */
   4370                             c = source.get(sourceArrayIndex++);
   4371                             ++nextSourceIndex;
   4372                             /*
   4373                              * This also tests if the codepage maps single surrogates. If it does, then surrogates are
   4374                              * not paired but mapped separately. Note that in this case unmatched surrogates are not
   4375                              * detected.
   4376                              */
   4377                             if (UTF16.isSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
   4378                                 if (UTF16.isLeadSurrogate((char) c)) {
   4379                                     // getTrail:
   4380                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4381                                             nextSourceIndex);
   4382                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
   4383                                     c = x.c;
   4384                                     sourceArrayIndex = x.sourceArrayIndex;
   4385                                     sourceIndex = x.sourceIndex;
   4386                                     nextSourceIndex = x.nextSourceIndex;
   4387 
   4388                                     if (x.doread) {
   4389                                         if (doloop)
   4390                                             continue;
   4391                                         else
   4392                                             break;
   4393                                     }
   4394                                 } else {
   4395                                     /* this is an unmatched trail code unit (2nd surrogate) */
   4396                                     /* callback(illegal) */
   4397                                     cr[0] = CoderResult.malformedForLength(1);
   4398                                     break;
   4399                                 }
   4400                             }
   4401                         } else {
   4402                             doread = true;
   4403                         }
   4404 
   4405                         /* convert the Unicode code point in c into codepage bytes */
   4406                         stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
   4407 
   4408                         /* get the bytes and the length for the output */
   4409                         /* MBCS_OUTPUT_2 */
   4410                         value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
   4411                         if (value <= 0xff) {
   4412                             length = 1;
   4413                         } else {
   4414                             length = 2;
   4415                         }
   4416 
   4417                         /* is this code point assigned, or do we use fallbacks? */
   4418                         if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0))) {
   4419                             /*
   4420                              * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
   4421                              * with this data structure for fallback output to be a zero byte.
   4422                              */
   4423 
   4424                             // unassigned:
   4425                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
   4426                                     nextSourceIndex);
   4427 
   4428                             doloop = unassignedDouble(source, target, x, flush, cr);
   4429                             c = x.c;
   4430                             sourceArrayIndex = x.sourceArrayIndex;
   4431                             sourceIndex = x.sourceIndex;
   4432                             nextSourceIndex = x.nextSourceIndex;
   4433                             if (doloop)
   4434                                 continue;
   4435                             else
   4436                                 break;
   4437                         }
   4438 
   4439                         /* write the output character bytes from value and length */
   4440                         /* from the first if in the loop we know that targetCapacity>0 */
   4441                         if (length == 1) {
   4442                             /* this is easy because we know that there is enough space */
   4443                             target.put((byte) value);
   4444                             if (offsets != null) {
   4445                                 offsets.put(sourceIndex);
   4446                             }
   4447                         } else /* length==2 */{
   4448                             target.put((byte) (value >>> 8));
   4449                             if (2 <= target.remaining()) {
   4450                                 target.put((byte) value);
   4451                                 if (offsets != null) {
   4452                                     offsets.put(sourceIndex);
   4453                                     offsets.put(sourceIndex);
   4454                                 }
   4455                             } else {
   4456                                 if (offsets != null) {
   4457                                     offsets.put(sourceIndex);
   4458                                 }
   4459                                 errorBuffer[0] = (byte) value;
   4460                                 errorBufferLength = 1;
   4461 
   4462                                 /* target overflow */
   4463                                 cr[0] = CoderResult.OVERFLOW;
   4464                                 c = 0;
   4465                                 break;
   4466                             }
   4467                         }
   4468 
   4469                         /* normal end of conversion: prepare for a new character */
   4470                         c = 0;
   4471                         sourceIndex = nextSourceIndex;
   4472                         continue;
   4473                     } else {
   4474                         /* target is full */
   4475                         cr[0] = CoderResult.OVERFLOW;
   4476                         break;
   4477                     }
   4478                 }
   4479             }
   4480 
   4481             /* set the converter state back into UConverter */
   4482             fromUChar32 = c;
   4483 
   4484             /* write back the updated pointers */
   4485             source.position(sourceArrayIndex);
   4486 
   4487             return cr[0];
   4488         }
   4489 
   4490         private final class SideEffectsSingleBMP {
   4491             int c, sourceArrayIndex;
   4492 
   4493             SideEffectsSingleBMP(int c_, int sourceArrayIndex_) {
   4494                 c = c_;
   4495                 sourceArrayIndex = sourceArrayIndex_;
   4496             }
   4497         }
   4498 
   4499         // function made out of block labeled getTrail in ucnv_MBCSSingleFromUnicodeWithOffsets
   4500         // assumes input c is lead surrogate
   4501         private final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr) {
   4502             if (x.sourceArrayIndex < source.limit()) {
   4503                 /* test the following code unit */
   4504                 char trail = source.get(x.sourceArrayIndex);
   4505                 if (UTF16.isTrailSurrogate(trail)) {
   4506                     ++x.sourceArrayIndex;
   4507                     x.c = UCharacter.getCodePoint((char) x.c, trail);
   4508                     /* this codepage does not map supplementary code points */
   4509                     /* callback(unassigned) */
   4510                     cr[0] = CoderResult.unmappableForLength(2);
   4511                     return false;
   4512                 } else {
   4513                     /* this is an unmatched lead code unit (1st surrogate) */
   4514                     /* callback(illegal) */
   4515                     cr[0] = CoderResult.malformedForLength(1);
   4516                     return false;
   4517                 }
   4518             } else {
   4519                 /* no more input */
   4520                 return false;
   4521             }
   4522             // return true;
   4523         }
   4524 
   4525         private final class SideEffects {
   4526             int c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength;
   4527             boolean doread = true;
   4528 
   4529             SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_,
   4530                     int prevLength_) {
   4531                 c = c_;
   4532                 sourceArrayIndex = sourceArrayIndex_;
   4533                 sourceIndex = sourceIndex_;
   4534                 nextSourceIndex = nextSourceIndex_;
   4535                 prevSourceIndex = prevSourceIndex_;
   4536                 prevLength = prevLength_;
   4537             }
   4538         }
   4539 
   4540         // function made out of block labeled getTrail in ucnv_MBCSFromUnicodeWithOffsets
   4541         // assumes input c is lead surrogate
   4542         private final boolean getTrail(CharBuffer source, ByteBuffer target, int uniMask, SideEffects x,
   4543                 boolean flush, CoderResult[] cr) {
   4544             if (x.sourceArrayIndex < source.limit()) {
   4545                 /* test the following code unit */
   4546                 char trail = source.get(x.sourceArrayIndex);
   4547                 if (UTF16.isTrailSurrogate(trail)) {
   4548                     ++x.sourceArrayIndex;
   4549                     ++x.nextSourceIndex;
   4550                     /* convert this supplementary code point */
   4551                     x.c = UCharacter.getCodePoint((char) x.c, trail);
   4552                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
   4553                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   4554                         fromUnicodeStatus = x.prevLength; /* save the old state */
   4555                         /* callback(unassigned) */
   4556                         x.doread = true;
   4557                         return unassigned(source, target, null, x, flush, cr);
   4558                     } else {
   4559                         x.doread = false;
   4560                         return true;
   4561                     }
   4562                 } else {
   4563                     /* this is an unmatched lead code unit (1st surrogate) */
   4564                     /* callback(illegal) */
   4565                     cr[0] = CoderResult.malformedForLength(1);
   4566                     return false;
   4567                 }
   4568             } else {
   4569                 /* no more input */
   4570                 return false;
   4571             }
   4572         }
   4573 
   4574         // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
   4575         private final boolean unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x,
   4576                 boolean flush, CoderResult[] cr) {
   4577             /* try an extension mapping */
   4578             int sourceBegin = x.sourceArrayIndex;
   4579             source.position(x.sourceArrayIndex);
   4580             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
   4581             x.sourceArrayIndex = source.position();
   4582             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
   4583             x.prevLength = fromUnicodeStatus;
   4584 
   4585             if (cr[0].isError()) {
   4586                 /* not mappable or buffer overflow */
   4587                 return false;
   4588             } else {
   4589                 /* a mapping was written to the target, continue */
   4590 
   4591                 /* recalculate the targetCapacity after an extension mapping */
   4592                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
   4593                 /* normal end of conversion: prepare for a new character */
   4594                 if (offsets != null) {
   4595                     x.prevSourceIndex = x.sourceIndex;
   4596                     x.sourceIndex = x.nextSourceIndex;
   4597                 }
   4598                 return true;
   4599             }
   4600         }
   4601 
   4602         private final class SideEffectsDouble {
   4603             int c, sourceArrayIndex, sourceIndex, nextSourceIndex;
   4604             boolean doread = true;
   4605 
   4606             SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_) {
   4607                 c = c_;
   4608                 sourceArrayIndex = sourceArrayIndex_;
   4609                 sourceIndex = sourceIndex_;
   4610                 nextSourceIndex = nextSourceIndex_;
   4611             }
   4612         }
   4613 
   4614         // function made out of block labeled getTrail in ucnv_MBCSDoubleFromUnicodeWithOffsets
   4615         // assumes input c is lead surrogate
   4616         private final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask,
   4617                 SideEffectsDouble x, boolean flush, CoderResult[] cr) {
   4618             if (x.sourceArrayIndex < source.limit()) {
   4619                 /* test the following code unit */
   4620                 char trail = source.get(x.sourceArrayIndex);
   4621                 if (UTF16.isTrailSurrogate(trail)) {
   4622                     ++x.sourceArrayIndex;
   4623                     ++x.nextSourceIndex;
   4624                     /* convert this supplementary code point */
   4625                     x.c = UCharacter.getCodePoint((char) x.c, trail);
   4626                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
   4627                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   4628                         /* callback(unassigned) */
   4629                         x.doread = true;
   4630                         return unassignedDouble(source, target, x, flush, cr);
   4631                     } else {
   4632                         x.doread = false;
   4633                         return true;
   4634                     }
   4635                 } else {
   4636                     /* this is an unmatched lead code unit (1st surrogate) */
   4637                     /* callback(illegal) */
   4638                     cr[0] = CoderResult.malformedForLength(1);
   4639                     return false;
   4640                 }
   4641             } else {
   4642                 /* no more input */
   4643                 return false;
   4644             }
   4645         }
   4646 
   4647         // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets
   4648         private final boolean unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x,
   4649                 boolean flush, CoderResult[] cr) {
   4650             /* try an extension mapping */
   4651             int sourceBegin = x.sourceArrayIndex;
   4652             source.position(x.sourceArrayIndex);
   4653             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
   4654             x.sourceArrayIndex = source.position();
   4655             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
   4656 
   4657             if (cr[0].isError()) {
   4658                 /* not mappable or buffer overflow */
   4659                 return false;
   4660             } else {
   4661                 /* a mapping was written to the target, continue */
   4662 
   4663                 /* recalculate the targetCapacity after an extension mapping */
   4664                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
   4665                 /* normal end of conversion: prepare for a new character */
   4666                 x.sourceIndex = x.nextSourceIndex;
   4667                 return true;
   4668             }
   4669         }
   4670 
   4671         /**
   4672          * Overrides super class method
   4673          *
   4674          * @param encoder
   4675          * @param source
   4676          * @param target
   4677          * @param offsets
   4678          * @return
   4679          */
   4680         @Override
   4681         protected CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target,
   4682                 IntBuffer offsets) {
   4683             CharsetMBCS cs = (CharsetMBCS) encoder.charset();
   4684             byte[] subchar;
   4685             int length;
   4686 
   4687             if (cs.subChar1 != 0
   4688                     && (cs.sharedData.mbcs.extIndexes != null ? encoder.useSubChar1
   4689                             : (encoder.invalidUCharBuffer[0] <= 0xff))) {
   4690                 /*
   4691                  * select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS
   4692                  * behavior)
   4693                  */
   4694                 subchar = new byte[] { cs.subChar1 };
   4695                 length = 1;
   4696             } else {
   4697                 /* select subChar in all other cases */
   4698                 subchar = cs.subChar;
   4699                 length = cs.subCharLen;
   4700             }
   4701 
   4702             /* reset the selector for the next code point */
   4703             encoder.useSubChar1 = false;
   4704 
   4705             if (cs.sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
   4706                 byte[] buffer = new byte[4];
   4707                 int i = 0;
   4708 
   4709                 /* fromUnicodeStatus contains prevLength */
   4710                 switch (length) {
   4711                 case 1:
   4712                     if (encoder.fromUnicodeStatus == 2) {
   4713                         /* DBCS mode and SBCS sub char: change to SBCS */
   4714                         encoder.fromUnicodeStatus = 1;
   4715                         buffer[i++] = UConverterConstants.SI;
   4716                     }
   4717                     buffer[i++] = subchar[0];
   4718                     break;
   4719                 case 2:
   4720                     if (encoder.fromUnicodeStatus <= 1) {
   4721                         /* SBCS mode and DBCS sub char: change to DBCS */
   4722                         encoder.fromUnicodeStatus = 2;
   4723                         buffer[i++] = UConverterConstants.SO;
   4724                     }
   4725                     buffer[i++] = subchar[0];
   4726                     buffer[i++] = subchar[1];
   4727                     break;
   4728                 default:
   4729                     throw new IllegalArgumentException();
   4730                 }
   4731 
   4732                 subchar = buffer;
   4733                 length = i;
   4734             }
   4735             return CharsetEncoderICU.fromUWriteBytes(encoder, subchar, 0, length, target, offsets, source.position());
   4736         }
   4737 
   4738         /**
   4739          * Gets called whenever CharsetEncoder.replaceWith gets called. allowReplacementChanges only allows subChar and
   4740          * subChar1 to be modified outside construction (since replaceWith is called once during construction).
   4741          *
   4742          * @param replacement
   4743          *            The replacement for subchar.
   4744          */
   4745         @Override
   4746         protected void implReplaceWith(byte[] replacement) {
   4747             if (allowReplacementChanges) {
   4748                 CharsetMBCS cs = (CharsetMBCS) this.charset();
   4749 
   4750                 System.arraycopy(replacement, 0, cs.subChar, 0, replacement.length);
   4751                 cs.subCharLen = (byte) replacement.length;
   4752                 cs.subChar1 = 0;
   4753             }
   4754         }
   4755     }
   4756 
   4757     @Override
   4758     public CharsetDecoder newDecoder() {
   4759         return new CharsetDecoderMBCS(this);
   4760     }
   4761 
   4762     @Override
   4763     public CharsetEncoder newEncoder() {
   4764         return new CharsetEncoderMBCS(this);
   4765     }
   4766 
   4767     @SuppressWarnings("fallthrough")
   4768     void MBCSGetFilteredUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which, int filter){
   4769         UConverterMBCSTable mbcsTable;
   4770         char[] table;
   4771         char st1,maxStage1, st2;
   4772         int st3;
   4773         int c ;
   4774 
   4775         mbcsTable = data.mbcs;
   4776         table = mbcsTable.fromUnicodeTable;
   4777         if(mbcsTable.hasSupplementary()){
   4778             maxStage1 = 0x440;
   4779         }
   4780         else{
   4781             maxStage1 = 0x40;
   4782         }
   4783         c=0; /* keep track of current code point while enumerating */
   4784 
   4785         if(mbcsTable.outputType==MBCS_OUTPUT_1){
   4786             char stage2, stage3;
   4787             char minValue;
   4788             char[] results = mbcsTable.fromUnicodeChars;
   4789 
   4790             if(which==ROUNDTRIP_SET) {
   4791                 /* use only roundtrips */
   4792                 minValue=0xf00;
   4793             } else {
   4794                 /* use all roundtrip and fallback results */
   4795                 minValue=0x800;
   4796             }
   4797             for(st1=0;st1<maxStage1;++st1){
   4798                 st2 = table[st1];
   4799                 if(st2>maxStage1){
   4800                     stage2 = st2;
   4801                     for(st2=0; st2<64; ++st2){
   4802                         st3 = table[stage2 + st2];
   4803                         if(st3!=0){
   4804                             /*read the stage 3 block */
   4805                             stage3 = (char)st3;
   4806                             do {
   4807                                 if(results[stage3++]>=minValue){
   4808                                      setFillIn.add(c);
   4809                                 }
   4810                             }while((++c&0xf) !=0);
   4811                           } else {
   4812                             c+= 16; /*empty stage 2 block */
   4813                         }
   4814                     }
   4815                 } else {
   4816                     c+=1024; /* empty stage 2 block */
   4817                 }
   4818             }
   4819         } else {
   4820             int[] tableInts = mbcsTable.fromUnicodeTableInts;
   4821             int stage2,stage3;
   4822             byte[] bytes;
   4823             int st3Multiplier;
   4824             int value;
   4825             boolean useFallBack;
   4826             bytes = mbcsTable.fromUnicodeBytes;
   4827             char[] chars = mbcsTable.fromUnicodeChars;
   4828             int[] ints = mbcsTable.fromUnicodeInts;
   4829             useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET);
   4830             switch(mbcsTable.outputType) {
   4831             case MBCS_OUTPUT_3:
   4832             case MBCS_OUTPUT_4_EUC:
   4833                 st3Multiplier = 3;
   4834                 break;
   4835             case MBCS_OUTPUT_4:
   4836                 st3Multiplier =4;
   4837                 break;
   4838             default:
   4839                 st3Multiplier =2;
   4840                 break;
   4841             }
   4842 
   4843             for(st1=0;st1<maxStage1;++st1){
   4844                 st2 = table[st1];
   4845                 if(st2>(maxStage1>>1)){
   4846                     stage2 =  st2 ;
   4847                     for(st2=0;st2<64;++st2){
   4848                         /*read the stage 3 block */
   4849                         st3 = tableInts[stage2 + st2];
   4850                         if(st3!=0){
   4851                         //if((st3=table[stage2+st2])!=0){
   4852                             stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK);
   4853 
   4854                             /* get the roundtrip flags for the stage 3 block */
   4855                             st3>>>=16;
   4856                             switch(filter) {
   4857                             case UCNV_SET_FILTER_NONE:
   4858                                 do {
   4859                                    if((st3&1)!=0){
   4860                                         setFillIn.add(c);
   4861                                    }else if (useFallBack) {
   4862                                         int b =0;
   4863                                         switch(st3Multiplier) {
   4864                                         case 4:
   4865                                             b = ints[stage3 / 4];
   4866                                             break;
   4867                                         case 3:
   4868                                             b |= bytes[stage3] | bytes[stage3 + 1] | bytes[stage3 + 2];
   4869                                             break;
   4870                                         case 2:
   4871                                             b = chars[stage3 / 2];
   4872                                             break;
   4873                                         default:
   4874                                             break;
   4875                                         }
   4876                                         stage3+=st3Multiplier;
   4877                                         if(b!=0) {
   4878                                             setFillIn.add(c);
   4879                                         }
   4880                                     }
   4881                                     st3>>=1;
   4882                                 }while((++c&0xf)!=0);
   4883                                 break;
   4884                             case UCNV_SET_FILTER_DBCS_ONLY:
   4885                                 /* Ignore single bytes results (<0x100). */
   4886                                 do {
   4887                                     if(((st3&1) != 0 || useFallBack) && chars[stage3 / 2] >= 0x100){
   4888                                         setFillIn.add(c);
   4889                                     }
   4890                                     st3>>=1;
   4891                                     stage3+=2;
   4892                                 }while((++c&0xf) != 0);
   4893                                break;
   4894                             case UCNV_SET_FILTER_2022_CN :
   4895                                 /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */
   4896                                 do {
   4897                                     if(((st3&1) != 0 || useFallBack) &&
   4898                                             ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & bytes[stage3]))==0x81 || value==0x82) ){
   4899                                         setFillIn.add(c);
   4900                                     }
   4901                                     st3>>=1;
   4902                                     stage3+=3;
   4903                                 }while((++c&0xf)!=0);
   4904                                 break;
   4905                             case UCNV_SET_FILTER_SJIS:
   4906                                 /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */
   4907                                 do{
   4908                                     if(((st3&1) != 0 || useFallBack) && (value=chars[stage3 / 2])>=0x8140 && value<=0xeffc){
   4909                                         setFillIn.add(c);
   4910                                     }
   4911                                     st3>>=1;
   4912                                     stage3+=2;
   4913                                 }while((++c&0xf)!=0);
   4914                                 break;
   4915                             case UCNV_SET_FILTER_GR94DBCS:
   4916                                 /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/
   4917                                 do {
   4918                                     if(((st3&1) != 0 || useFallBack) &&
   4919                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])- 0xa1a1))<=(0xfefe - 0xa1a1) &&
   4920                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
   4921                                         setFillIn.add(c);
   4922                                     }
   4923                                     st3>>=1;
   4924                                     stage3+=2;
   4925                                 }while((++c&0xf)!=0);
   4926                                 break;
   4927                             case UCNV_SET_FILTER_HZ:
   4928                                 /*Only add code points that are suitable for HZ DBCS*/
   4929                                 do {
   4930                                     if( ((st3&1) != 0 || useFallBack) &&
   4931                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])-0xa1a1))<=(0xfdfe - 0xa1a1) &&
   4932                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
   4933                                         setFillIn.add(c);
   4934                                     }
   4935                                     st3>>=1;
   4936                                     stage3+=2;
   4937                                 }while((++c&0xf) != 0);
   4938                                 break;
   4939                             default:
   4940                                 return;
   4941                             }
   4942                         } else {
   4943                             c+=16; /* empty stage 3 block */
   4944                         }
   4945                     }
   4946                 } else {
   4947                     c+=1024; /*empty stage2 block */
   4948                 }
   4949             }
   4950         }
   4951         extGetUnicodeSet(setFillIn, which, filter, data);
   4952     }
   4953 
   4954     static void extGetUnicodeSetString(ByteBuffer cx,UnicodeSet setFillIn, boolean useFallback,
   4955         int minLength, int c, char s[],int length,int sectionIndex){
   4956         CharBuffer fromUSectionUChar;
   4957         IntBuffer fromUSectionValues;
   4958         fromUSectionUChar = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX,char.class );
   4959         fromUSectionValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX,int.class );
   4960         int fromUSectionUCharIndex = fromUSectionUChar.position()+sectionIndex;
   4961         int fromUSectionValuesIndex = fromUSectionValues.position()+sectionIndex;
   4962         int value, i, count;
   4963 
   4964         /* read first pair of the section */
   4965        count = fromUSectionUChar.get(fromUSectionUCharIndex++);
   4966        value = fromUSectionValues.get(fromUSectionValuesIndex++);
   4967        if(value!=0 && (FROM_U_IS_ROUNDTRIP(value) || useFallback) && FROM_U_GET_LENGTH(value)>=minLength) {
   4968            if(c>=0){
   4969                setFillIn.add(c);
   4970            } else {
   4971                StringBuilder normalizedStringBuilder = new StringBuilder();
   4972                for(int j=0; j<length;j++){
   4973                    normalizedStringBuilder.append(s[j]);
   4974                }
   4975                String normalizedString = normalizedStringBuilder.toString();
   4976                for(int j=0;j<length;j++){
   4977                    setFillIn.add(normalizedString);
   4978                }
   4979              }
   4980        }
   4981 
   4982        for(i=0; i<count; ++i){
   4983            s[length] = fromUSectionUChar.get(fromUSectionUCharIndex + i);
   4984            value = fromUSectionValues.get(fromUSectionValuesIndex + i);
   4985 
   4986            if(value==0) {
   4987                /* no mapping, do nothing */
   4988            } else if (FROM_U_IS_PARTIAL(value)) {
   4989                extGetUnicodeSetString( cx, setFillIn, useFallback, minLength, UConverterConstants.U_SENTINEL, s, length+1,
   4990                        FROM_U_GET_PARTIAL_INDEX(value));
   4991            } else if ((useFallback ? (value&FROM_U_RESERVED_MASK)==0:((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))==FROM_U_ROUNDTRIP_FLAG))
   4992                    && FROM_U_GET_LENGTH(value)>=minLength) {
   4993                StringBuilder normalizedStringBuilder = new StringBuilder(); // String for composite characters
   4994                for(int j=0; j<(length+1);j++){
   4995                    normalizedStringBuilder.append(s[j]);
   4996                }
   4997              setFillIn.add(normalizedStringBuilder.toString());
   4998            }
   4999        }
   5000 
   5001     }
   5002 
   5003 
   5004     static void extGetUnicodeSet(UnicodeSet setFillIn, int which, int filter, UConverterSharedData Data){
   5005         int st1, stage1Length, st2, st3, minLength;
   5006         int ps2, ps3;
   5007 
   5008         CharBuffer stage12, stage3;
   5009         int value, length;
   5010         IntBuffer stage3b;
   5011         boolean useFallback;
   5012         char s[] = new char[MAX_UCHARS];
   5013         int c;
   5014         ByteBuffer cx = Data.mbcs.extIndexes;
   5015         if(cx == null){
   5016             return;
   5017         }
   5018         stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX,char.class );
   5019         stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX,char.class );
   5020         stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX,int.class );
   5021 
   5022         stage1Length = cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH);
   5023         useFallback = (which==ROUNDTRIP_AND_FALLBACK_SET);
   5024 
   5025         c = 0;
   5026         if(filter == UCNV_SET_FILTER_2022_CN) {
   5027             minLength = 3;
   5028         } else if (Data.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY || filter != UCNV_SET_FILTER_NONE) {
   5029             /* DBCS-only, ignore single-byte results */
   5030             minLength = 2;
   5031         } else {
   5032             minLength = 1;
   5033         }
   5034 
   5035         for(st1=0; st1< stage1Length; ++st1){
   5036             st2 = stage12.get(st1);
   5037             if(st2>stage1Length) {
   5038                 ps2 = st2;
   5039                 for(st2=0;st2<64;++st2){
   5040                     st3=(stage12.get(ps2+st2))<<STAGE_2_LEFT_SHIFT;
   5041                     if(st3!= 0){
   5042                         ps3 = st3;
   5043                         do {
   5044                             value = stage3b.get(stage3.get(ps3++));
   5045                             if(value==0){
   5046                                 /* no mapping do nothing */
   5047                             }else if (FROM_U_IS_PARTIAL(value)){
   5048                                 length = 0;
   5049                                 length=UTF16.append(s, length, c);
   5050                                 extGetUnicodeSetString(cx,setFillIn,useFallback,minLength,c,s,length,FROM_U_GET_PARTIAL_INDEX(value));
   5051                             } else if ((useFallback ?  (value&FROM_U_RESERVED_MASK)==0 :((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))== FROM_U_ROUNDTRIP_FLAG)) &&
   5052                                     FROM_U_GET_LENGTH(value)>=minLength){
   5053 
   5054                                 switch(filter) {
   5055                                 case UCNV_SET_FILTER_2022_CN:
   5056                                     if(!(FROM_U_GET_LENGTH(value)==3 && FROM_U_GET_DATA(value)<=0x82ffff)){
   5057                                         continue;
   5058                                     }
   5059                                     break;
   5060                                 case UCNV_SET_FILTER_SJIS:
   5061                                     if(!(FROM_U_GET_LENGTH(value)==2 && (value=FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)){
   5062                                         continue;
   5063                                     }
   5064                                     break;
   5065                                 case UCNV_SET_FILTER_GR94DBCS:
   5066                                     if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfefe - 0xa1a1)
   5067                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
   5068                                         continue;
   5069                                     }
   5070                                     break;
   5071                                 case UCNV_SET_FILTER_HZ:
   5072                                     if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfdfe - 0xa1a1)
   5073                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
   5074                                         continue;
   5075                                     }
   5076                                     break;
   5077                                 default:
   5078                                     /*
   5079                                      * UCNV_SET_FILTER_NONE,
   5080                                      * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
   5081                                      */
   5082                                     break;
   5083                                 }
   5084                                 setFillIn.add(c);
   5085 
   5086                             }
   5087                         }while((++c&0xf) != 0);
   5088 
   5089                     } else {
   5090                         c+=16;   /* emplty stage3 block */
   5091                     }
   5092                 }
   5093             } else {
   5094                 c+=1024;  /* empty stage 2 block*/
   5095             }
   5096         }
   5097     }
   5098 
   5099     void MBCSGetUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which){
   5100         MBCSGetFilteredUnicodeSetForUnicode(data, setFillIn, which,
   5101                 this.sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? UCNV_SET_FILTER_DBCS_ONLY : UCNV_SET_FILTER_NONE );
   5102     }
   5103 
   5104     @Override
   5105     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
   5106         if((options & MBCS_OPTION_GB18030)!=0){
   5107             setFillIn.add(0, 0xd7ff);
   5108             setFillIn.add(0xe000, 0x10ffff);
   5109         }
   5110         else {
   5111             this.MBCSGetUnicodeSetForUnicode(sharedData, setFillIn, which);
   5112         }
   5113     }
   5114 
   5115 }
   5116