Home | History | Annotate | Download | only in charset
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /**
      4  *******************************************************************************
      5  * Copyright (C) 2006-2013, International Business Machines Corporation and    *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  *
      9  *******************************************************************************
     10  */
     11 
     12 package com.ibm.icu.charset;
     13 
     14 import java.nio.BufferOverflowException;
     15 import java.nio.ByteBuffer;
     16 import java.nio.CharBuffer;
     17 import java.nio.IntBuffer;
     18 import java.nio.charset.CharsetEncoder;
     19 import java.nio.charset.CoderResult;
     20 import java.nio.charset.CodingErrorAction;
     21 
     22 import com.ibm.icu.impl.Assert;
     23 import com.ibm.icu.lang.UCharacter;
     24 import com.ibm.icu.text.UTF16;
     25 
     26 /**
     27  * An abstract class that provides framework methods of decoding operations for concrete
     28  * subclasses.
     29  * In the future this class will contain API that will implement converter semantics of ICU4C.
     30  * @stable ICU 3.6
     31  */
     32 public abstract class CharsetEncoderICU extends CharsetEncoder {
     33 
     34     /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
     35     static final char MISSING_CHAR_MARKER = '\uFFFF';
     36 
     37     byte[] errorBuffer = new byte[30];
     38 
     39     int errorBufferLength = 0;
     40 
     41     /** these are for encodeLoopICU */
     42     int fromUnicodeStatus;
     43 
     44     int fromUChar32;
     45 
     46     boolean useSubChar1;
     47 
     48     boolean useFallback;
     49 
     50     /* maximum number of indexed UChars */
     51     static final int EXT_MAX_UCHARS = 19;
     52 
     53     /* store previous UChars/chars to continue partial matches */
     54     int preFromUFirstCP; /* >=0: partial match */
     55 
     56     char[] preFromUArray = new char[EXT_MAX_UCHARS];
     57 
     58     int preFromUBegin;
     59 
     60     int preFromULength; /* negative: replay */
     61 
     62     char[] invalidUCharBuffer = new char[2];
     63 
     64     int invalidUCharLength;
     65 
     66     Object fromUContext;
     67 
     68     private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
     69 
     70     private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
     71 
     72     CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
     73         @Override
     74         public CoderResult call(CharsetEncoderICU encoder, Object context,
     75                 CharBuffer source, ByteBuffer target, IntBuffer offsets,
     76                 char[] buffer, int length, int cp, CoderResult cr) {
     77             if (cr.isUnmappable()) {
     78                 return onUnmappableInput.call(encoder, context, source, target,
     79                         offsets, buffer, length, cp, cr);
     80             } else /* if (cr.isMalformed()) */ {
     81                 return onMalformedInput.call(encoder, context, source, target,
     82                         offsets, buffer, length, cp, cr);
     83             }
     84             // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr);
     85 
     86         }
     87     };
     88 
     89     /*
     90      * Construcs a new encoder for the given charset
     91      *
     92      * @param cs
     93      *            for which the decoder is created
     94      * @param replacement
     95      *            the substitution bytes
     96      */
     97     CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
     98         super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
     99                 cs.maxBytesPerChar, replacement);
    100     }
    101 
    102     /**
    103      * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
    104      * that will convert a Unicode codepoint sequence to a byte sequence, but
    105      * the encoded byte sequence will round trip convert to a different
    106      * Unicode codepoint sequence.
    107      * @return true if the converter uses fallback, false otherwise.
    108      * @stable ICU 3.8
    109      */
    110     public boolean isFallbackUsed() {
    111         return useFallback;
    112     }
    113 
    114     /**
    115      * Sets whether this Encoder can use fallbacks?
    116      * @param usesFallback true if the user wants the converter to take
    117      *  advantage of the fallback mapping, false otherwise.
    118      * @stable ICU 3.8
    119      */
    120     public void setFallbackUsed(boolean usesFallback) {
    121         useFallback = usesFallback;
    122     }
    123 
    124     /*
    125      * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
    126      * @param c A codepoint
    127      */
    128     final boolean isFromUUseFallback(int c) {
    129         return (useFallback) || isUnicodePrivateUse(c);
    130     }
    131 
    132     /**
    133      * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
    134      */
    135     static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
    136         return (iUseFallback) || isUnicodePrivateUse(c);
    137     }
    138 
    139     private static final boolean isUnicodePrivateUse(int c) {
    140         // First test for U+E000 to optimize for the most common characters.
    141         return c >= 0xE000 && (c <= 0xF8FF ||
    142                 c >= 0xF0000 && (c <= 0xFFFFD ||
    143                 (c >= 0x100000 && c <= 0x10FFFD)));
    144     }
    145 
    146     /**
    147      * Sets the action to be taken if an illegal sequence is encountered
    148      *
    149      * @param newAction
    150      *            action to be taken
    151      * @exception IllegalArgumentException
    152      * @stable ICU 3.6
    153      */
    154     @Override
    155     protected void implOnMalformedInput(CodingErrorAction newAction) {
    156         onMalformedInput = getCallback(newAction);
    157     }
    158 
    159     /**
    160      * Sets the action to be taken if an illegal sequence is encountered
    161      *
    162      * @param newAction
    163      *            action to be taken
    164      * @exception IllegalArgumentException
    165      * @stable ICU 3.6
    166      */
    167     @Override
    168     protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
    169         onUnmappableInput = getCallback(newAction);
    170     }
    171 
    172     /**
    173      * Sets the callback encoder method and context to be used if an illegal sequence is encountered.
    174      * You would normally call this twice to set both the malform and unmappable error. In this case,
    175      * newContext should remain the same since using a different newContext each time will negate the last
    176      * one used.
    177      * @param err CoderResult
    178      * @param newCallback CharsetCallback.Encoder
    179      * @param newContext Object
    180      * @stable ICU 4.0
    181      */
    182     public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
    183         if (err.isMalformed()) {
    184             onMalformedInput = newCallback;
    185         } else if (err.isUnmappable()) {
    186             onUnmappableInput = newCallback;
    187         } else {
    188             /* Error: Only malformed and unmappable are handled. */
    189         }
    190 
    191         if (fromUContext == null || !fromUContext.equals(newContext)) {
    192             setFromUContext(newContext);
    193         }
    194     }
    195 
    196     /**
    197      * Sets fromUContext used in callbacks.
    198      *
    199      * @param newContext Object
    200      * @exception IllegalArgumentException The object is an illegal argument for UContext.
    201      * @stable ICU 4.0
    202      */
    203     public final void setFromUContext(Object newContext) {
    204         fromUContext = newContext;
    205     }
    206 
    207     private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
    208         if (action == CodingErrorAction.REPLACE) {
    209             return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
    210         } else if (action == CodingErrorAction.IGNORE) {
    211             return CharsetCallback.FROM_U_CALLBACK_SKIP;
    212         } else /* if (action == CodingErrorAction.REPORT) */ {
    213             return CharsetCallback.FROM_U_CALLBACK_STOP;
    214         }
    215     }
    216 
    217     private static final CharBuffer EMPTY = CharBuffer.allocate(0);
    218 
    219     /**
    220      * Flushes any characters saved in the converter's internal buffer and
    221      * resets the converter.
    222      * @param out action to be taken
    223      * @return result of flushing action and completes the decoding all input.
    224      *         Returns CoderResult.UNDERFLOW if the action succeeds.
    225      * @stable ICU 3.6
    226      */
    227     @Override
    228     protected CoderResult implFlush(ByteBuffer out) {
    229         return encode(EMPTY, out, null, true);
    230     }
    231 
    232     /**
    233      * Resets the from Unicode mode of converter
    234      * @stable ICU 3.6
    235      */
    236     @Override
    237     protected void implReset() {
    238         errorBufferLength = 0;
    239         fromUnicodeStatus = 0;
    240         fromUChar32 = 0;
    241         fromUnicodeReset();
    242     }
    243 
    244     private void fromUnicodeReset() {
    245         preFromUBegin = 0;
    246         preFromUFirstCP = UConverterConstants.U_SENTINEL;
    247         preFromULength = 0;
    248     }
    249 
    250     /**
    251      * Encodes one or more chars. The default behaviour of the
    252      * converter is stop and report if an error in input stream is encountered.
    253      * To set different behaviour use @see CharsetEncoder.onMalformedInput()
    254      * @param in buffer to decode
    255      * @param out buffer to populate with decoded result
    256      * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
    257      *         action succeeds or more input is needed for completing the decoding action.
    258      * @stable ICU 3.6
    259      */
    260     @Override
    261     protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
    262         if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
    263             // The Java framework should have already substituted what was left.
    264             fromUChar32 = 0;
    265             //fromUnicodeReset();
    266             return CoderResult.UNDERFLOW;
    267         }
    268         in.position(in.position() + fromUCountPending());
    269         /* do the conversion */
    270         CoderResult ret = encode(in, out, null, false);
    271         setSourcePosition(in);
    272         /* No need to reset to keep the proper state of the encoder.
    273          if (ret.isUnderflow() && in.hasRemaining()) {
    274             // The Java framework is going to substitute what is left.
    275             //fromUnicodeReset();
    276         } */
    277         return ret;
    278     }
    279 
    280     /*
    281      * Implements ICU semantics of buffer management
    282      * @param source
    283      * @param target
    284      * @param offsets
    285      * @return A CoderResult object that contains the error result when an error occurs.
    286      */
    287     abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
    288             IntBuffer offsets, boolean flush);
    289 
    290     /*
    291      * Implements ICU semantics for encoding the buffer
    292      * @param source The input character buffer
    293      * @param target The output byte buffer
    294      * @param offsets
    295      * @param flush true if, and only if, the invoker can provide no
    296      *  additional input bytes beyond those in the given buffer.
    297      * @return A CoderResult object that contains the error result when an error occurs.
    298      */
    299     final CoderResult encode(CharBuffer source, ByteBuffer target,
    300             IntBuffer offsets, boolean flush) {
    301 
    302         /* check parameters */
    303         if (target == null || source == null) {
    304             throw new IllegalArgumentException();
    305         }
    306 
    307         /*
    308          * Make sure that the buffer sizes do not exceed the number range for
    309          * int32_t because some functions use the size (in units or bytes)
    310          * rather than comparing pointers, and because offsets are int32_t values.
    311          *
    312          * size_t is guaranteed to be unsigned and large enough for the job.
    313          *
    314          * Return with an error instead of adjusting the limits because we would
    315          * not be able to maintain the semantics that either the source must be
    316          * consumed or the target filled (unless an error occurs).
    317          * An adjustment would be targetLimit=t+0x7fffffff; for example.
    318          */
    319 
    320         /* flush the target overflow buffer */
    321         if (errorBufferLength > 0) {
    322             byte[] overflowArray;
    323             int i, length;
    324 
    325             overflowArray = errorBuffer;
    326             length = errorBufferLength;
    327             i = 0;
    328             do {
    329                 if (target.remaining() == 0) {
    330                     /* the overflow buffer contains too much, keep the rest */
    331                     int j = 0;
    332 
    333                     do {
    334                         overflowArray[j++] = overflowArray[i++];
    335                     } while (i < length);
    336 
    337                     errorBufferLength = (byte) j;
    338                     return CoderResult.OVERFLOW;
    339                 }
    340 
    341                 /* copy the overflow contents to the target */
    342                 target.put(overflowArray[i++]);
    343                 if (offsets != null) {
    344                     offsets.put(-1); /* no source index available for old output */
    345                 }
    346             } while (i < length);
    347 
    348             /* the overflow buffer is completely copied to the target */
    349             errorBufferLength = 0;
    350         }
    351 
    352         if (!flush && source.remaining() == 0 && preFromULength >= 0) {
    353             /* the overflow buffer is emptied and there is no new input: we are done */
    354             return CoderResult.UNDERFLOW;
    355         }
    356 
    357         /*
    358          * Do not simply return with a buffer overflow error if
    359          * !flush && t==targetLimit
    360          * because it is possible that the source will not generate any output.
    361          * For example, the skip callback may be called;
    362          * it does not output anything.
    363          */
    364 
    365         return fromUnicodeWithCallback(source, target, offsets, flush);
    366 
    367     }
    368 
    369     /*
    370      * Implementation note for m:n conversions
    371      *
    372      * While collecting source units to find the longest match for m:n conversion,
    373      * some source units may need to be stored for a partial match.
    374      * When a second buffer does not yield a match on all of the previously stored
    375      * source units, then they must be "replayed", i.e., fed back into the converter.
    376      *
    377      * The code relies on the fact that replaying will not nest -
    378      * converting a replay buffer will not result in a replay.
    379      * This is because a replay is necessary only after the _continuation_ of a
    380      * partial match failed, but a replay buffer is converted as a whole.
    381      * It may result in some of its units being stored again for a partial match,
    382      * but there will not be a continuation _during_ the replay which could fail.
    383      *
    384      * It is conceivable that a callback function could call the converter
    385      * recursively in a way that causes another replay to be stored, but that
    386      * would be an error in the callback function.
    387      * Such violations will cause assertion failures in a debug build,
    388      * and wrong output, but they will not cause a crash.
    389      */
    390     final CoderResult fromUnicodeWithCallback(CharBuffer source,
    391             ByteBuffer target, IntBuffer offsets, boolean flush) {
    392         int sBufferIndex;
    393         int sourceIndex;
    394         int errorInputLength;
    395         boolean converterSawEndOfInput, calledCallback;
    396 
    397         /* variables for m:n conversion */
    398         CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
    399         int replayArrayIndex = 0;
    400         CharBuffer realSource;
    401         boolean realFlush;
    402 
    403         CoderResult cr = CoderResult.UNDERFLOW;
    404 
    405         /* get the converter implementation function */
    406         sourceIndex = 0;
    407 
    408         if (preFromULength >= 0) {
    409             /* normal mode */
    410             realSource = null;
    411             realFlush = false;
    412         } else {
    413             /*
    414              * Previous m:n conversion stored source units from a partial match
    415              * and failed to consume all of them.
    416              * We need to "replay" them from a temporary buffer and convert them first.
    417              */
    418             realSource = source;
    419             realFlush = flush;
    420 
    421             //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
    422             replayArray.put(preFromUArray, 0, -preFromULength);
    423             source = replayArray;
    424             source.position(replayArrayIndex);
    425             source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
    426             flush = false;
    427 
    428             preFromULength = 0;
    429         }
    430 
    431         /*
    432          * loop for conversion and error handling
    433          *
    434          * loop {
    435          *   convert
    436          *   loop {
    437          *     update offsets
    438          *     handle end of input
    439          *     handle errors/call callback
    440          *   }
    441          * }
    442          */
    443         for (;;) {
    444             /* convert */
    445             cr = encodeLoop(source, target, offsets, flush);
    446             /*
    447              * set a flag for whether the converter
    448              * successfully processed the end of the input
    449              *
    450              * need not check cnv.preFromULength==0 because a replay (<0) will cause
    451              * s<sourceLimit before converterSawEndOfInput is checked
    452              */
    453             converterSawEndOfInput = (cr.isUnderflow() && flush
    454                     && source.remaining() == 0 && fromUChar32 == 0);
    455 
    456             /* no callback called yet for this iteration */
    457             calledCallback = false;
    458 
    459             /* no sourceIndex adjustment for conversion, only for callback output */
    460             errorInputLength = 0;
    461 
    462             /*
    463              * loop for offsets and error handling
    464              *
    465              * iterates at most 3 times:
    466              * 1. to clean up after the conversion function
    467              * 2. after the callback
    468              * 3. after the callback again if there was truncated input
    469              */
    470             for (;;) {
    471                 /* update offsets if we write any */
    472                 /* Currently offsets are not being used in ICU4J */
    473                 /* if (offsets != null) {
    474                     int length = target.remaining();
    475                     if (length > 0) {
    476 
    477                         /*
    478                          * if a converter handles offsets and updates the offsets
    479                          * pointer at the end, then offset should not change
    480                          * here;
    481                          * however, some converters do not handle offsets at all
    482                          * (sourceIndex<0) or may not update the offsets pointer
    483                          */
    484                  /*       offsets.position(offsets.position() + length);
    485                     }
    486 
    487                     if (sourceIndex >= 0) {
    488                         sourceIndex += (int) (source.position());
    489                     }
    490                 } */
    491 
    492                 if (preFromULength < 0) {
    493                     /*
    494                      * switch the source to new replay units (cannot occur while replaying)
    495                      * after offset handling and before end-of-input and callback handling
    496                      */
    497                     if (realSource == null) {
    498                         realSource = source;
    499                         realFlush = flush;
    500 
    501                         //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
    502                         replayArray.put(preFromUArray, 0, -preFromULength);
    503 
    504                         source = replayArray;
    505                         source.position(replayArrayIndex);
    506                         source.limit(replayArrayIndex - preFromULength);
    507                         flush = false;
    508                         if ((sourceIndex += preFromULength) < 0) {
    509                             sourceIndex = -1;
    510                         }
    511 
    512                         preFromULength = 0;
    513                     } else {
    514                         /* see implementation note before _fromUnicodeWithCallback() */
    515                         //agljport:todo U_ASSERT(realSource==NULL);
    516                         Assert.assrt(realSource == null);
    517                     }
    518                 }
    519 
    520                 /* update pointers */
    521                 sBufferIndex = source.position();
    522                 if (cr.isUnderflow()) {
    523                     if (sBufferIndex < source.limit()) {
    524                         /*
    525                          * continue with the conversion loop while there is still input left
    526                          * (continue converting by breaking out of only the inner loop)
    527                          */
    528                         break;
    529                     } else if (realSource != null) {
    530                         /* switch back from replaying to the real source and continue */
    531                         source = realSource;
    532                         flush = realFlush;
    533                         sourceIndex = source.position();
    534                         realSource = null;
    535                         break;
    536                     } else if (flush && fromUChar32 != 0) {
    537                         /*
    538                          * the entire input stream is consumed
    539                          * and there is a partial, truncated input sequence left
    540                          */
    541 
    542                         /* inject an error and continue with callback handling */
    543                         //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
    544                         cr = CoderResult.malformedForLength(1);
    545                         calledCallback = false; /* new error condition */
    546                     } else {
    547                         /* input consumed */
    548                         if (flush) {
    549                             /*
    550                              * return to the conversion loop once more if the flush
    551                              * flag is set and the conversion function has not
    552                              * successfully processed the end of the input yet
    553                              *
    554                              * (continue converting by breaking out of only the inner loop)
    555                              */
    556                             if (!converterSawEndOfInput) {
    557                                 break;
    558                             }
    559 
    560                             /* reset the converter without calling the callback function */
    561                             implReset();
    562                         }
    563 
    564                         /* done successfully */
    565                         return cr;
    566                     }
    567                 }
    568 
    569                 /*U_FAILURE(*err) */
    570                 {
    571 
    572                     if (calledCallback || cr.isOverflow()
    573                             || (!cr.isMalformed() && !cr.isUnmappable())) {
    574                         /*
    575                          * the callback did not or cannot resolve the error:
    576                          * set output pointers and return
    577                          *
    578                          * the check for buffer overflow is redundant but it is
    579                          * a high-runner case and hopefully documents the intent
    580                          * well
    581                          *
    582                          * if we were replaying, then the replay buffer must be
    583                          * copied back into the UConverter
    584                          * and the real arguments must be restored
    585                          */
    586                         if (realSource != null) {
    587                             int length;
    588 
    589                             //agljport:todo U_ASSERT(cnv.preFromULength==0);
    590 
    591                             length = source.remaining();
    592                             if (length > 0) {
    593                                 //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
    594                                 source.get(preFromUArray, 0, length);
    595                                 preFromULength = (byte) -length;
    596                             }
    597                         }
    598                         return cr;
    599                     }
    600                 }
    601 
    602                 /* callback handling */
    603                 {
    604                     int codePoint;
    605 
    606                     /* get and write the code point */
    607                     codePoint = fromUChar32;
    608                     errorInputLength = UTF16.append(invalidUCharBuffer, 0,
    609                             fromUChar32);
    610                     invalidUCharLength = errorInputLength;
    611 
    612                     /* set the converter state to deal with the next character */
    613                     fromUChar32 = 0;
    614 
    615                     /* call the callback function */
    616                     cr = fromCharErrorBehaviour.call(this, fromUContext,
    617                             source, target, offsets, invalidUCharBuffer,
    618                             invalidUCharLength, codePoint, cr);
    619                 }
    620 
    621                 /*
    622                  * loop back to the offset handling
    623                  *
    624                  * this flag will indicate after offset handling
    625                  * that a callback was called;
    626                  * if the callback did not resolve the error, then we return
    627                  */
    628                 calledCallback = true;
    629             }
    630         }
    631     }
    632 
    633     /*
    634      * Ascertains if a given Unicode code point (32bit value for handling surrogates)
    635      * can be converted to the target encoding. If the caller wants to test if a
    636      * surrogate pair can be converted to target encoding then the
    637      * responsibility of assembling the int value lies with the caller.
    638      * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
    639      * <pre>
    640      *  while(i<mySource.length){
    641      *      if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
    642      *          if(UTF16.isTrailSurrogate(mySource[i+1])){
    643      *              int temp = UTF16.charAt(mySource,i,i+1,0);
    644      *              if(!((CharsetEncoderICU) myConv).canEncode(temp)){
    645      *                  passed=false;
    646      *              }
    647      *              i++;
    648      *              i++;
    649      *          }
    650      *      }
    651      *  }
    652      * </pre>
    653      * or
    654      * <pre>
    655      *  String src = new String(mySource);
    656      *  int i,codepoint;
    657      *  boolean passed = false;
    658      *  while(i<src.length()){
    659      *      codepoint = UTF16.charAt(src,i);
    660      *      i+= (codepoint>0xfff)? 2:1;
    661      *      if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
    662      *          passed = false;
    663      *      }
    664      *  }
    665      * </pre>
    666      *
    667      * @param codepoint Unicode code point as int value
    668      * @return true if a character can be converted
    669      */
    670     /* TODO This is different from Java's canEncode(char) API.
    671      * ICU's API should implement getUnicodeSet,
    672      * and override canEncode(char) which queries getUnicodeSet.
    673      * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
    674      */
    675     /*public boolean canEncode(int codepoint) {
    676         return true;
    677     }*/
    678     /**
    679      * Overrides super class method
    680      * @stable ICU 3.6
    681      */
    682     @Override
    683     public boolean isLegalReplacement(byte[] repl) {
    684         return true;
    685     }
    686 
    687     /*
    688      * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
    689      * @param cnv
    690      * @param bytesArray
    691      * @param bytesBegin
    692      * @param bytesLength
    693      * @param out
    694      * @param offsets
    695      * @param sourceIndex
    696      * @return A CoderResult object that contains the error result when an error occurs.
    697      */
    698     static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
    699             byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
    700             IntBuffer offsets, int sourceIndex) {
    701 
    702         //write bytes
    703         int obl = bytesLength;
    704         CoderResult cr = CoderResult.UNDERFLOW;
    705         int bytesLimit = bytesBegin + bytesLength;
    706         try {
    707             for (; bytesBegin < bytesLimit;) {
    708                 out.put(bytesArray[bytesBegin]);
    709                 bytesBegin++;
    710             }
    711             // success
    712             bytesLength = 0;
    713         } catch (BufferOverflowException ex) {
    714             cr = CoderResult.OVERFLOW;
    715         }
    716 
    717         if (offsets != null) {
    718             while (obl > bytesLength) {
    719                 offsets.put(sourceIndex);
    720                 --obl;
    721             }
    722         }
    723         //write overflow
    724         cnv.errorBufferLength = bytesLimit - bytesBegin;
    725         if (cnv.errorBufferLength > 0) {
    726             int index = 0;
    727             while (bytesBegin < bytesLimit) {
    728                 cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
    729             }
    730             cr = CoderResult.OVERFLOW;
    731         }
    732         return cr;
    733     }
    734 
    735     /*
    736      * Returns the number of chars held in the converter's internal state
    737      * because more input is needed for completing the conversion. This function is
    738      * useful for mapping semantics of ICU's converter interface to those of iconv,
    739      * and this information is not needed for normal conversion.
    740      * @return The number of chars in the state. -1 if an error is encountered.
    741      */
    742     /*public*/int fromUCountPending() {
    743         if (preFromULength > 0) {
    744             return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
    745         } else if (preFromULength < 0) {
    746             return -preFromULength;
    747         } else if (fromUChar32 > 0) {
    748             return 1;
    749         } else if (preFromUFirstCP > 0) {
    750             return UTF16.getCharCount(preFromUFirstCP);
    751         }
    752         return 0;
    753     }
    754 
    755     /**
    756      *
    757      * @param source
    758      */
    759     private final void setSourcePosition(CharBuffer source) {
    760 
    761         // ok was there input held in the previous invocation of encodeLoop
    762         // that resulted in output in this invocation?
    763         source.position(source.position() - fromUCountPending());
    764     }
    765 
    766     /*
    767      * Write the codepage substitution character.
    768      * Subclasses to override this method.
    769      * For stateful converters, it is typically necessary to handle this
    770      * specificially for the converter in order to properly maintain the state.
    771      * @param source The input character buffer
    772      * @param target The output byte buffer
    773      * @param offsets
    774      * @return A CoderResult object that contains the error result when an error occurs.
    775      */
    776     CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
    777             ByteBuffer target, IntBuffer offsets) {
    778         CharsetICU cs = (CharsetICU) encoder.charset();
    779         byte[] sub = encoder.replacement();
    780         if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
    781             return CharsetEncoderICU.fromUWriteBytes(encoder,
    782                     new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
    783                             .position());
    784         } else {
    785             return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
    786                     sub.length, target, offsets, source.position());
    787         }
    788     }
    789 
    790     /*
    791      * Write the characters to target.
    792      * @param source The input character buffer
    793      * @param target The output byte buffer
    794      * @param offsets
    795      * @return A CoderResult object that contains the error result when an error occurs.
    796      */
    797     CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
    798             CharBuffer source, ByteBuffer target, IntBuffer offsets) {
    799         CoderResult cr = CoderResult.UNDERFLOW;
    800 
    801         /* This is a fun one.  Recursion can occur - we're basically going to
    802          * just retry shoving data through the same converter. Note, if you got
    803          * here through some kind of invalid sequence, you maybe should emit a
    804          * reset sequence of some kind. Since this IS an actual conversion,
    805          * take care that you've changed the callback or the data, or you'll
    806          * get an infinite loop.
    807          */
    808 
    809         int oldTargetPosition = target.position();
    810         int offsetIndex = source.position();
    811 
    812         cr = encoder.encode(source, target, null, false); /* no offsets and no flush */
    813 
    814         if (offsets != null) {
    815             while (target.position() != oldTargetPosition) {
    816                 offsets.put(offsetIndex);
    817                 oldTargetPosition++;
    818             }
    819         }
    820 
    821         /* Note, if you did something like used a stop subcallback, things would get interesting.
    822          * In fact, here's where we want to return the partially consumed in-source!
    823          */
    824         if (cr.isOverflow()) {
    825             /* Overflowed target. Now, we'll write into the charErrorBuffer.
    826              * It's a fixed size. If we overflow it...Hm
    827              */
    828 
    829             /* start the new target at the first free slot in the error buffer */
    830             int errBuffLen = encoder.errorBufferLength;
    831             ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
    832             newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
    833             encoder.errorBufferLength = 0;
    834 
    835             encoder.encode(source, newTarget, null, false);
    836 
    837             encoder.errorBuffer = newTarget.array();
    838             encoder.errorBufferLength = newTarget.position();
    839         }
    840 
    841         return cr;
    842     }
    843 
    844     /**
    845      * <p>
    846      * Handles a common situation where a character has been read and it may be
    847      * a lead surrogate followed by a trail surrogate. This method can change
    848      * the source position and will modify fromUChar32.
    849      * </p>
    850      *
    851      * <p>
    852      * If <code>null</code> is returned, then there was success in reading a
    853      * surrogate pair, the codepoint is stored in <code>fromUChar32</code> and
    854      * <code>fromUChar32</code> should be reset (to 0) after being read.
    855      * </p>
    856      *
    857      * @param source
    858      *            The encoding source.
    859      * @param lead
    860      *            A character that may be the first in a surrogate pair.
    861      * @return <code>CoderResult.malformedForLength(1)</code> or
    862      *         <code>CoderResult.UNDERFLOW</code> if there is a problem, or
    863      *         <code>null</code> if there isn't.
    864      * @see #handleSurrogates(CharBuffer, char)
    865      * @see #handleSurrogates(char[], int, int, char)
    866      */
    867     final CoderResult handleSurrogates(CharBuffer source, char lead) {
    868         if (!UTF16.isLeadSurrogate(lead)) {
    869             fromUChar32 = lead;
    870             return CoderResult.malformedForLength(1);
    871         }
    872 
    873         if (!source.hasRemaining()) {
    874             fromUChar32 = lead;
    875             return CoderResult.UNDERFLOW;
    876         }
    877 
    878         char trail = source.get();
    879 
    880         if (!UTF16.isTrailSurrogate(trail)) {
    881             fromUChar32 = lead;
    882             source.position(source.position() - 1);
    883             return CoderResult.malformedForLength(1);
    884         }
    885 
    886         fromUChar32 = UCharacter.getCodePoint(lead, trail);
    887         return null;
    888     }
    889 
    890     /**
    891      * <p>
    892      * Same as <code>handleSurrogates(CharBuffer, char)</code>, but with arrays. As an added
    893      * requirement, the calling method must also increment the index if this method returns
    894      * <code>null</code>.
    895      * </p>
    896      *
    897      *
    898      * @param source
    899      *            The encoding source.
    900      * @param lead
    901      *            A character that may be the first in a surrogate pair.
    902      * @return <code>CoderResult.malformedForLength(1)</code> or
    903      *         <code>CoderResult.UNDERFLOW</code> if there is a problem, or <code>null</code> if
    904      *         there isn't.
    905      * @see #handleSurrogates(CharBuffer, char)
    906      * @see #handleSurrogates(char[], int, int, char)
    907      */
    908     final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
    909             int sourceLimit, char lead) {
    910         if (!UTF16.isLeadSurrogate(lead)) {
    911             fromUChar32 = lead;
    912             return CoderResult.malformedForLength(1);
    913         }
    914 
    915         if (sourceIndex >= sourceLimit) {
    916             fromUChar32 = lead;
    917             return CoderResult.UNDERFLOW;
    918         }
    919 
    920         char trail = sourceArray[sourceIndex];
    921 
    922         if (!UTF16.isTrailSurrogate(trail)) {
    923             fromUChar32 = lead;
    924             return CoderResult.malformedForLength(1);
    925         }
    926 
    927         fromUChar32 = UCharacter.getCodePoint(lead, trail);
    928         return null;
    929     }
    930 
    931     /**
    932      * Returns the maxCharsPerByte value for the Charset that created this encoder.
    933      * @return maxCharsPerByte
    934      * @stable ICU 4.8
    935      */
    936     public final float maxCharsPerByte() {
    937         return ((CharsetICU)(this.charset())).maxCharsPerByte;
    938     }
    939 
    940     /**
    941      * Calculates the size of a buffer for conversion from Unicode to a charset.
    942      * The calculated size is guaranteed to be sufficient for this conversion.
    943      *
    944      * It takes into account initial and final non-character bytes that are output
    945      * by some converters.
    946      * It does not take into account callbacks which output more than one charset
    947      * character sequence per call, like escape callbacks.
    948      * The default (substitution) callback only outputs one charset character sequence.
    949      *
    950      * @param length Number of chars to be converted.
    951      * @param maxCharSize Return value from maxBytesPerChar for the converter
    952      *                    that will be used.
    953      * @return Size of a buffer that will be large enough to hold the output of bytes
    954      *
    955      * @stable ICU 49
    956      */
    957     public static int getMaxBytesForString(int length, int maxCharSize) {
    958         return ((length + 10) * maxCharSize);
    959     }
    960 
    961 }
    962