Home | History | Annotate | Download | only in charset
      1 /**
      2  *******************************************************************************
      3  * Copyright (C) 2006-2013, International Business Machines Corporation and    *
      4  * others. All Rights Reserved.                                                *
      5  *******************************************************************************
      6  *
      7  *******************************************************************************
      8  */
      9 
     10 package com.ibm.icu.charset;
     11 
     12 import java.nio.BufferOverflowException;
     13 import java.nio.ByteBuffer;
     14 import java.nio.CharBuffer;
     15 import java.nio.IntBuffer;
     16 import java.nio.charset.CharsetEncoder;
     17 import java.nio.charset.CoderResult;
     18 import java.nio.charset.CodingErrorAction;
     19 
     20 import com.ibm.icu.impl.Assert;
     21 import com.ibm.icu.lang.UCharacter;
     22 import com.ibm.icu.text.UTF16;
     23 
     24 /**
     25  * An abstract class that provides framework methods of decoding operations for concrete
     26  * subclasses.
     27  * In the future this class will contain API that will implement converter semantics of ICU4C.
     28  * @stable ICU 3.6
     29  */
     30 public abstract class CharsetEncoderICU extends CharsetEncoder {
     31 
     32     /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
     33     static final char MISSING_CHAR_MARKER = '\uFFFF';
     34 
     35     byte[] errorBuffer = new byte[30];
     36 
     37     int errorBufferLength = 0;
     38 
     39     /** these are for encodeLoopICU */
     40     int fromUnicodeStatus;
     41 
     42     int fromUChar32;
     43 
     44     boolean useSubChar1;
     45 
     46     boolean useFallback;
     47 
     48     /* maximum number of indexed UChars */
     49     static final int EXT_MAX_UCHARS = 19;
     50 
     51     /* store previous UChars/chars to continue partial matches */
     52     int preFromUFirstCP; /* >=0: partial match */
     53 
     54     char[] preFromUArray = new char[EXT_MAX_UCHARS];
     55 
     56     int preFromUBegin;
     57 
     58     int preFromULength; /* negative: replay */
     59 
     60     char[] invalidUCharBuffer = new char[2];
     61 
     62     int invalidUCharLength;
     63 
     64     Object fromUContext;
     65 
     66     private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
     67 
     68     private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
     69 
     70     CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
     71         public CoderResult call(CharsetEncoderICU encoder, Object context,
     72                 CharBuffer source, ByteBuffer target, IntBuffer offsets,
     73                 char[] buffer, int length, int cp, CoderResult cr) {
     74             if (cr.isUnmappable()) {
     75                 return onUnmappableInput.call(encoder, context, source, target,
     76                         offsets, buffer, length, cp, cr);
     77             } else /* if (cr.isMalformed()) */ {
     78                 return onMalformedInput.call(encoder, context, source, target,
     79                         offsets, buffer, length, cp, cr);
     80             }
     81             // return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, offsets, buffer, length, cp, cr);
     82 
     83         }
     84     };
     85 
     86     /*
     87      * Construcs a new encoder for the given charset
     88      *
     89      * @param cs
     90      *            for which the decoder is created
     91      * @param replacement
     92      *            the substitution bytes
     93      */
     94     CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
     95         super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
     96                 cs.maxBytesPerChar, replacement);
     97     }
     98 
     99     /**
    100      * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
    101      * that will convert a Unicode codepoint sequence to a byte sequence, but
    102      * the encoded byte sequence will round trip convert to a different
    103      * Unicode codepoint sequence.
    104      * @return true if the converter uses fallback, false otherwise.
    105      * @stable ICU 3.8
    106      */
    107     public boolean isFallbackUsed() {
    108         return useFallback;
    109     }
    110 
    111     /**
    112      * Sets whether this Encoder can use fallbacks?
    113      * @param usesFallback true if the user wants the converter to take
    114      *  advantage of the fallback mapping, false otherwise.
    115      * @stable ICU 3.8
    116      */
    117     public void setFallbackUsed(boolean usesFallback) {
    118         useFallback = usesFallback;
    119     }
    120 
    121     /*
    122      * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
    123      * @param c A codepoint
    124      */
    125     final boolean isFromUUseFallback(int c) {
    126         return (useFallback) || isUnicodePrivateUse(c);
    127     }
    128 
    129     /**
    130      * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
    131      */
    132     static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
    133         return (iUseFallback) || isUnicodePrivateUse(c);
    134     }
    135 
    136     private static final boolean isUnicodePrivateUse(int c) {
    137         // First test for U+E000 to optimize for the most common characters.
    138         return c >= 0xE000 && (c <= 0xF8FF ||
    139                 c >= 0xF0000 && (c <= 0xFFFFD ||
    140                 (c >= 0x100000 && c <= 0x10FFFD)));
    141     }
    142 
    143     /**
    144      * Sets the action to be taken if an illegal sequence is encountered
    145      *
    146      * @param newAction
    147      *            action to be taken
    148      * @exception IllegalArgumentException
    149      * @stable ICU 3.6
    150      */
    151     protected void implOnMalformedInput(CodingErrorAction newAction) {
    152         onMalformedInput = getCallback(newAction);
    153     }
    154 
    155     /**
    156      * Sets the action to be taken if an illegal sequence is encountered
    157      *
    158      * @param newAction
    159      *            action to be taken
    160      * @exception IllegalArgumentException
    161      * @stable ICU 3.6
    162      */
    163     protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
    164         onUnmappableInput = getCallback(newAction);
    165     }
    166 
    167     /**
    168      * Sets the callback encoder method and context to be used if an illegal sequence is encountered.
    169      * You would normally call this twice to set both the malform and unmappable error. In this case,
    170      * newContext should remain the same since using a different newContext each time will negate the last
    171      * one used.
    172      * @param err CoderResult
    173      * @param newCallback CharsetCallback.Encoder
    174      * @param newContext Object
    175      * @stable ICU 4.0
    176      */
    177     public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
    178         if (err.isMalformed()) {
    179             onMalformedInput = newCallback;
    180         } else if (err.isUnmappable()) {
    181             onUnmappableInput = newCallback;
    182         } else {
    183             /* Error: Only malformed and unmappable are handled. */
    184         }
    185 
    186         if (fromUContext == null || !fromUContext.equals(newContext)) {
    187             setFromUContext(newContext);
    188         }
    189     }
    190 
    191     /**
    192      * Sets fromUContext used in callbacks.
    193      *
    194      * @param newContext Object
    195      * @exception IllegalArgumentException The object is an illegal argument for UContext.
    196      * @stable ICU 4.0
    197      */
    198     public final void setFromUContext(Object newContext) {
    199         fromUContext = newContext;
    200     }
    201 
    202     private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
    203         if (action == CodingErrorAction.REPLACE) {
    204             return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
    205         } else if (action == CodingErrorAction.IGNORE) {
    206             return CharsetCallback.FROM_U_CALLBACK_SKIP;
    207         } else /* if (action == CodingErrorAction.REPORT) */ {
    208             return CharsetCallback.FROM_U_CALLBACK_STOP;
    209         }
    210     }
    211 
    212     private static final CharBuffer EMPTY = CharBuffer.allocate(0);
    213 
    214     /**
    215      * Flushes any characters saved in the converter's internal buffer and
    216      * resets the converter.
    217      * @param out action to be taken
    218      * @return result of flushing action and completes the decoding all input.
    219      *         Returns CoderResult.UNDERFLOW if the action succeeds.
    220      * @stable ICU 3.6
    221      */
    222     protected CoderResult implFlush(ByteBuffer out) {
    223         return encode(EMPTY, out, null, true);
    224     }
    225 
    226     /**
    227      * Resets the from Unicode mode of converter
    228      * @stable ICU 3.6
    229      */
    230     protected void implReset() {
    231         errorBufferLength = 0;
    232         fromUnicodeStatus = 0;
    233         fromUChar32 = 0;
    234         fromUnicodeReset();
    235     }
    236 
    237     private void fromUnicodeReset() {
    238         preFromUBegin = 0;
    239         preFromUFirstCP = UConverterConstants.U_SENTINEL;
    240         preFromULength = 0;
    241     }
    242 
    243     /**
    244      * Encodes one or more chars. The default behaviour of the
    245      * converter is stop and report if an error in input stream is encountered.
    246      * To set different behaviour use @see CharsetEncoder.onMalformedInput()
    247      * @param in buffer to decode
    248      * @param out buffer to populate with decoded result
    249      * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
    250      *         action succeeds or more input is needed for completing the decoding action.
    251      * @stable ICU 3.6
    252      */
    253     protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
    254         if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
    255             // The Java framework should have already substituted what was left.
    256             fromUChar32 = 0;
    257             //fromUnicodeReset();
    258             return CoderResult.UNDERFLOW;
    259         }
    260         in.position(in.position() + fromUCountPending());
    261         /* do the conversion */
    262         CoderResult ret = encode(in, out, null, false);
    263         setSourcePosition(in);
    264         /* No need to reset to keep the proper state of the encoder.
    265          if (ret.isUnderflow() && in.hasRemaining()) {
    266             // The Java framework is going to substitute what is left.
    267             //fromUnicodeReset();
    268         } */
    269         return ret;
    270     }
    271 
    272     /*
    273      * Implements ICU semantics of buffer management
    274      * @param source
    275      * @param target
    276      * @param offsets
    277      * @return A CoderResult object that contains the error result when an error occurs.
    278      */
    279     abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
    280             IntBuffer offsets, boolean flush);
    281 
    282     /*
    283      * Implements ICU semantics for encoding the buffer
    284      * @param source The input character buffer
    285      * @param target The output byte buffer
    286      * @param offsets
    287      * @param flush true if, and only if, the invoker can provide no
    288      *  additional input bytes beyond those in the given buffer.
    289      * @return A CoderResult object that contains the error result when an error occurs.
    290      */
    291     final CoderResult encode(CharBuffer source, ByteBuffer target,
    292             IntBuffer offsets, boolean flush) {
    293 
    294         /* check parameters */
    295         if (target == null || source == null) {
    296             throw new IllegalArgumentException();
    297         }
    298 
    299         /*
    300          * Make sure that the buffer sizes do not exceed the number range for
    301          * int32_t because some functions use the size (in units or bytes)
    302          * rather than comparing pointers, and because offsets are int32_t values.
    303          *
    304          * size_t is guaranteed to be unsigned and large enough for the job.
    305          *
    306          * Return with an error instead of adjusting the limits because we would
    307          * not be able to maintain the semantics that either the source must be
    308          * consumed or the target filled (unless an error occurs).
    309          * An adjustment would be targetLimit=t+0x7fffffff; for example.
    310          */
    311 
    312         /* flush the target overflow buffer */
    313         if (errorBufferLength > 0) {
    314             byte[] overflowArray;
    315             int i, length;
    316 
    317             overflowArray = errorBuffer;
    318             length = errorBufferLength;
    319             i = 0;
    320             do {
    321                 if (target.remaining() == 0) {
    322                     /* the overflow buffer contains too much, keep the rest */
    323                     int j = 0;
    324 
    325                     do {
    326                         overflowArray[j++] = overflowArray[i++];
    327                     } while (i < length);
    328 
    329                     errorBufferLength = (byte) j;
    330                     return CoderResult.OVERFLOW;
    331                 }
    332 
    333                 /* copy the overflow contents to the target */
    334                 target.put(overflowArray[i++]);
    335                 if (offsets != null) {
    336                     offsets.put(-1); /* no source index available for old output */
    337                 }
    338             } while (i < length);
    339 
    340             /* the overflow buffer is completely copied to the target */
    341             errorBufferLength = 0;
    342         }
    343 
    344         if (!flush && source.remaining() == 0 && preFromULength >= 0) {
    345             /* the overflow buffer is emptied and there is no new input: we are done */
    346             return CoderResult.UNDERFLOW;
    347         }
    348 
    349         /*
    350          * Do not simply return with a buffer overflow error if
    351          * !flush && t==targetLimit
    352          * because it is possible that the source will not generate any output.
    353          * For example, the skip callback may be called;
    354          * it does not output anything.
    355          */
    356 
    357         return fromUnicodeWithCallback(source, target, offsets, flush);
    358 
    359     }
    360 
    361     /*
    362      * Implementation note for m:n conversions
    363      *
    364      * While collecting source units to find the longest match for m:n conversion,
    365      * some source units may need to be stored for a partial match.
    366      * When a second buffer does not yield a match on all of the previously stored
    367      * source units, then they must be "replayed", i.e., fed back into the converter.
    368      *
    369      * The code relies on the fact that replaying will not nest -
    370      * converting a replay buffer will not result in a replay.
    371      * This is because a replay is necessary only after the _continuation_ of a
    372      * partial match failed, but a replay buffer is converted as a whole.
    373      * It may result in some of its units being stored again for a partial match,
    374      * but there will not be a continuation _during_ the replay which could fail.
    375      *
    376      * It is conceivable that a callback function could call the converter
    377      * recursively in a way that causes another replay to be stored, but that
    378      * would be an error in the callback function.
    379      * Such violations will cause assertion failures in a debug build,
    380      * and wrong output, but they will not cause a crash.
    381      */
    382     final CoderResult fromUnicodeWithCallback(CharBuffer source,
    383             ByteBuffer target, IntBuffer offsets, boolean flush) {
    384         int sBufferIndex;
    385         int sourceIndex;
    386         int errorInputLength;
    387         boolean converterSawEndOfInput, calledCallback;
    388 
    389         /* variables for m:n conversion */
    390         CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
    391         int replayArrayIndex = 0;
    392         CharBuffer realSource;
    393         boolean realFlush;
    394 
    395         CoderResult cr = CoderResult.UNDERFLOW;
    396 
    397         /* get the converter implementation function */
    398         sourceIndex = 0;
    399 
    400         if (preFromULength >= 0) {
    401             /* normal mode */
    402             realSource = null;
    403             realFlush = false;
    404         } else {
    405             /*
    406              * Previous m:n conversion stored source units from a partial match
    407              * and failed to consume all of them.
    408              * We need to "replay" them from a temporary buffer and convert them first.
    409              */
    410             realSource = source;
    411             realFlush = flush;
    412 
    413             //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
    414             replayArray.put(preFromUArray, 0, -preFromULength);
    415             source = replayArray;
    416             source.position(replayArrayIndex);
    417             source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
    418             flush = false;
    419 
    420             preFromULength = 0;
    421         }
    422 
    423         /*
    424          * loop for conversion and error handling
    425          *
    426          * loop {
    427          *   convert
    428          *   loop {
    429          *     update offsets
    430          *     handle end of input
    431          *     handle errors/call callback
    432          *   }
    433          * }
    434          */
    435         for (;;) {
    436             /* convert */
    437             cr = encodeLoop(source, target, offsets, flush);
    438             /*
    439              * set a flag for whether the converter
    440              * successfully processed the end of the input
    441              *
    442              * need not check cnv.preFromULength==0 because a replay (<0) will cause
    443              * s<sourceLimit before converterSawEndOfInput is checked
    444              */
    445             converterSawEndOfInput = (cr.isUnderflow() && flush
    446                     && source.remaining() == 0 && fromUChar32 == 0);
    447 
    448             /* no callback called yet for this iteration */
    449             calledCallback = false;
    450 
    451             /* no sourceIndex adjustment for conversion, only for callback output */
    452             errorInputLength = 0;
    453 
    454             /*
    455              * loop for offsets and error handling
    456              *
    457              * iterates at most 3 times:
    458              * 1. to clean up after the conversion function
    459              * 2. after the callback
    460              * 3. after the callback again if there was truncated input
    461              */
    462             for (;;) {
    463                 /* update offsets if we write any */
    464                 /* Currently offsets are not being used in ICU4J */
    465                 /* if (offsets != null) {
    466                     int length = target.remaining();
    467                     if (length > 0) {
    468 
    469                         /*
    470                          * if a converter handles offsets and updates the offsets
    471                          * pointer at the end, then offset should not change
    472                          * here;
    473                          * however, some converters do not handle offsets at all
    474                          * (sourceIndex<0) or may not update the offsets pointer
    475                          */
    476                  /*       offsets.position(offsets.position() + length);
    477                     }
    478 
    479                     if (sourceIndex >= 0) {
    480                         sourceIndex += (int) (source.position());
    481                     }
    482                 } */
    483 
    484                 if (preFromULength < 0) {
    485                     /*
    486                      * switch the source to new replay units (cannot occur while replaying)
    487                      * after offset handling and before end-of-input and callback handling
    488                      */
    489                     if (realSource == null) {
    490                         realSource = source;
    491                         realFlush = flush;
    492 
    493                         //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
    494                         replayArray.put(preFromUArray, 0, -preFromULength);
    495 
    496                         source = replayArray;
    497                         source.position(replayArrayIndex);
    498                         source.limit(replayArrayIndex - preFromULength);
    499                         flush = false;
    500                         if ((sourceIndex += preFromULength) < 0) {
    501                             sourceIndex = -1;
    502                         }
    503 
    504                         preFromULength = 0;
    505                     } else {
    506                         /* see implementation note before _fromUnicodeWithCallback() */
    507                         //agljport:todo U_ASSERT(realSource==NULL);
    508                         Assert.assrt(realSource == null);
    509                     }
    510                 }
    511 
    512                 /* update pointers */
    513                 sBufferIndex = source.position();
    514                 if (cr.isUnderflow()) {
    515                     if (sBufferIndex < source.limit()) {
    516                         /*
    517                          * continue with the conversion loop while there is still input left
    518                          * (continue converting by breaking out of only the inner loop)
    519                          */
    520                         break;
    521                     } else if (realSource != null) {
    522                         /* switch back from replaying to the real source and continue */
    523                         source = realSource;
    524                         flush = realFlush;
    525                         sourceIndex = source.position();
    526                         realSource = null;
    527                         break;
    528                     } else if (flush && fromUChar32 != 0) {
    529                         /*
    530                          * the entire input stream is consumed
    531                          * and there is a partial, truncated input sequence left
    532                          */
    533 
    534                         /* inject an error and continue with callback handling */
    535                         //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
    536                         cr = CoderResult.malformedForLength(1);
    537                         calledCallback = false; /* new error condition */
    538                     } else {
    539                         /* input consumed */
    540                         if (flush) {
    541                             /*
    542                              * return to the conversion loop once more if the flush
    543                              * flag is set and the conversion function has not
    544                              * successfully processed the end of the input yet
    545                              *
    546                              * (continue converting by breaking out of only the inner loop)
    547                              */
    548                             if (!converterSawEndOfInput) {
    549                                 break;
    550                             }
    551 
    552                             /* reset the converter without calling the callback function */
    553                             implReset();
    554                         }
    555 
    556                         /* done successfully */
    557                         return cr;
    558                     }
    559                 }
    560 
    561                 /*U_FAILURE(*err) */
    562                 {
    563 
    564                     if (calledCallback || cr.isOverflow()
    565                             || (!cr.isMalformed() && !cr.isUnmappable())) {
    566                         /*
    567                          * the callback did not or cannot resolve the error:
    568                          * set output pointers and return
    569                          *
    570                          * the check for buffer overflow is redundant but it is
    571                          * a high-runner case and hopefully documents the intent
    572                          * well
    573                          *
    574                          * if we were replaying, then the replay buffer must be
    575                          * copied back into the UConverter
    576                          * and the real arguments must be restored
    577                          */
    578                         if (realSource != null) {
    579                             int length;
    580 
    581                             //agljport:todo U_ASSERT(cnv.preFromULength==0);
    582 
    583                             length = source.remaining();
    584                             if (length > 0) {
    585                                 //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
    586                                 source.get(preFromUArray, 0, length);
    587                                 preFromULength = (byte) -length;
    588                             }
    589                         }
    590                         return cr;
    591                     }
    592                 }
    593 
    594                 /* callback handling */
    595                 {
    596                     int codePoint;
    597 
    598                     /* get and write the code point */
    599                     codePoint = fromUChar32;
    600                     errorInputLength = UTF16.append(invalidUCharBuffer, 0,
    601                             fromUChar32);
    602                     invalidUCharLength = errorInputLength;
    603 
    604                     /* set the converter state to deal with the next character */
    605                     fromUChar32 = 0;
    606 
    607                     /* call the callback function */
    608                     cr = fromCharErrorBehaviour.call(this, fromUContext,
    609                             source, target, offsets, invalidUCharBuffer,
    610                             invalidUCharLength, codePoint, cr);
    611                 }
    612 
    613                 /*
    614                  * loop back to the offset handling
    615                  *
    616                  * this flag will indicate after offset handling
    617                  * that a callback was called;
    618                  * if the callback did not resolve the error, then we return
    619                  */
    620                 calledCallback = true;
    621             }
    622         }
    623     }
    624 
    625     /*
    626      * Ascertains if a given Unicode code point (32bit value for handling surrogates)
    627      * can be converted to the target encoding. If the caller wants to test if a
    628      * surrogate pair can be converted to target encoding then the
    629      * responsibility of assembling the int value lies with the caller.
    630      * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
    631      * <pre>
    632      *  while(i<mySource.length){
    633      *      if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
    634      *          if(UTF16.isTrailSurrogate(mySource[i+1])){
    635      *              int temp = UTF16.charAt(mySource,i,i+1,0);
    636      *              if(!((CharsetEncoderICU) myConv).canEncode(temp)){
    637      *                  passed=false;
    638      *              }
    639      *              i++;
    640      *              i++;
    641      *          }
    642      *      }
    643      *  }
    644      * </pre>
    645      * or
    646      * <pre>
    647      *  String src = new String(mySource);
    648      *  int i,codepoint;
    649      *  boolean passed = false;
    650      *  while(i<src.length()){
    651      *      codepoint = UTF16.charAt(src,i);
    652      *      i+= (codepoint>0xfff)? 2:1;
    653      *      if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
    654      *          passed = false;
    655      *      }
    656      *  }
    657      * </pre>
    658      *
    659      * @param codepoint Unicode code point as int value
    660      * @return true if a character can be converted
    661      */
    662     /* TODO This is different from Java's canEncode(char) API.
    663      * ICU's API should implement getUnicodeSet,
    664      * and override canEncode(char) which queries getUnicodeSet.
    665      * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
    666      */
    667     /*public boolean canEncode(int codepoint) {
    668         return true;
    669     }*/
    670     /**
    671      * Overrides super class method
    672      * @stable ICU 3.6
    673      */
    674     public boolean isLegalReplacement(byte[] repl) {
    675         return true;
    676     }
    677 
    678     /*
    679      * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
    680      * @param cnv
    681      * @param bytesArray
    682      * @param bytesBegin
    683      * @param bytesLength
    684      * @param out
    685      * @param offsets
    686      * @param sourceIndex
    687      * @return A CoderResult object that contains the error result when an error occurs.
    688      */
    689     static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
    690             byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
    691             IntBuffer offsets, int sourceIndex) {
    692 
    693         //write bytes
    694         int obl = bytesLength;
    695         CoderResult cr = CoderResult.UNDERFLOW;
    696         int bytesLimit = bytesBegin + bytesLength;
    697         try {
    698             for (; bytesBegin < bytesLimit;) {
    699                 out.put(bytesArray[bytesBegin]);
    700                 bytesBegin++;
    701             }
    702             // success
    703             bytesLength = 0;
    704         } catch (BufferOverflowException ex) {
    705             cr = CoderResult.OVERFLOW;
    706         }
    707 
    708         if (offsets != null) {
    709             while (obl > bytesLength) {
    710                 offsets.put(sourceIndex);
    711                 --obl;
    712             }
    713         }
    714         //write overflow
    715         cnv.errorBufferLength = bytesLimit - bytesBegin;
    716         if (cnv.errorBufferLength > 0) {
    717             int index = 0;
    718             while (bytesBegin < bytesLimit) {
    719                 cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
    720             }
    721             cr = CoderResult.OVERFLOW;
    722         }
    723         return cr;
    724     }
    725 
    726     /*
    727      * Returns the number of chars held in the converter's internal state
    728      * because more input is needed for completing the conversion. This function is
    729      * useful for mapping semantics of ICU's converter interface to those of iconv,
    730      * and this information is not needed for normal conversion.
    731      * @return The number of chars in the state. -1 if an error is encountered.
    732      */
    733     /*public*/int fromUCountPending() {
    734         if (preFromULength > 0) {
    735             return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
    736         } else if (preFromULength < 0) {
    737             return -preFromULength;
    738         } else if (fromUChar32 > 0) {
    739             return 1;
    740         } else if (preFromUFirstCP > 0) {
    741             return UTF16.getCharCount(preFromUFirstCP);
    742         }
    743         return 0;
    744     }
    745 
    746     /**
    747      *
    748      * @param source
    749      */
    750     private final void setSourcePosition(CharBuffer source) {
    751 
    752         // ok was there input held in the previous invocation of encodeLoop
    753         // that resulted in output in this invocation?
    754         source.position(source.position() - fromUCountPending());
    755     }
    756 
    757     /*
    758      * Write the codepage substitution character.
    759      * Subclasses to override this method.
    760      * For stateful converters, it is typically necessary to handle this
    761      * specificially for the converter in order to properly maintain the state.
    762      * @param source The input character buffer
    763      * @param target The output byte buffer
    764      * @param offsets
    765      * @return A CoderResult object that contains the error result when an error occurs.
    766      */
    767     CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
    768             ByteBuffer target, IntBuffer offsets) {
    769         CharsetICU cs = (CharsetICU) encoder.charset();
    770         byte[] sub = encoder.replacement();
    771         if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
    772             return CharsetEncoderICU.fromUWriteBytes(encoder,
    773                     new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
    774                             .position());
    775         } else {
    776             return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
    777                     sub.length, target, offsets, source.position());
    778         }
    779     }
    780 
    781     /*
    782      * Write the characters to target.
    783      * @param source The input character buffer
    784      * @param target The output byte buffer
    785      * @param offsets
    786      * @return A CoderResult object that contains the error result when an error occurs.
    787      */
    788     CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
    789             CharBuffer source, ByteBuffer target, IntBuffer offsets) {
    790         CoderResult cr = CoderResult.UNDERFLOW;
    791 
    792         /* This is a fun one.  Recursion can occur - we're basically going to
    793          * just retry shoving data through the same converter. Note, if you got
    794          * here through some kind of invalid sequence, you maybe should emit a
    795          * reset sequence of some kind. Since this IS an actual conversion,
    796          * take care that you've changed the callback or the data, or you'll
    797          * get an infinite loop.
    798          */
    799 
    800         int oldTargetPosition = target.position();
    801         int offsetIndex = source.position();
    802 
    803         cr = encoder.encode(source, target, null, false); /* no offsets and no flush */
    804 
    805         if (offsets != null) {
    806             while (target.position() != oldTargetPosition) {
    807                 offsets.put(offsetIndex);
    808                 oldTargetPosition++;
    809             }
    810         }
    811 
    812         /* Note, if you did something like used a stop subcallback, things would get interesting.
    813          * In fact, here's where we want to return the partially consumed in-source!
    814          */
    815         if (cr.isOverflow()) {
    816             /* Overflowed target. Now, we'll write into the charErrorBuffer.
    817              * It's a fixed size. If we overflow it...Hm
    818              */
    819 
    820             /* start the new target at the first free slot in the error buffer */
    821             int errBuffLen = encoder.errorBufferLength;
    822             ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
    823             newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
    824             encoder.errorBufferLength = 0;
    825 
    826             encoder.encode(source, newTarget, null, false);
    827 
    828             encoder.errorBuffer = newTarget.array();
    829             encoder.errorBufferLength = newTarget.position();
    830         }
    831 
    832         return cr;
    833     }
    834 
    835     /**
    836      * <p>
    837      * Handles a common situation where a character has been read and it may be
    838      * a lead surrogate followed by a trail surrogate. This method can change
    839      * the source position and will modify fromUChar32.
    840      * </p>
    841      *
    842      * <p>
    843      * If <code>null</code> is returned, then there was success in reading a
    844      * surrogate pair, the codepoint is stored in <code>fromUChar32</code> and
    845      * <code>fromUChar32</code> should be reset (to 0) after being read.
    846      * </p>
    847      *
    848      * @param source
    849      *            The encoding source.
    850      * @param lead
    851      *            A character that may be the first in a surrogate pair.
    852      * @return <code>CoderResult.malformedForLength(1)</code> or
    853      *         <code>CoderResult.UNDERFLOW</code> if there is a problem, or
    854      *         <code>null</code> if there isn't.
    855      * @see #handleSurrogates(CharBuffer, char)
    856      * @see #handleSurrogates(char[], int, int, char)
    857      */
    858     final CoderResult handleSurrogates(CharBuffer source, char lead) {
    859         if (!UTF16.isLeadSurrogate(lead)) {
    860             fromUChar32 = lead;
    861             return CoderResult.malformedForLength(1);
    862         }
    863 
    864         if (!source.hasRemaining()) {
    865             fromUChar32 = lead;
    866             return CoderResult.UNDERFLOW;
    867         }
    868 
    869         char trail = source.get();
    870 
    871         if (!UTF16.isTrailSurrogate(trail)) {
    872             fromUChar32 = lead;
    873             source.position(source.position() - 1);
    874             return CoderResult.malformedForLength(1);
    875         }
    876 
    877         fromUChar32 = UCharacter.getCodePoint(lead, trail);
    878         return null;
    879     }
    880 
    881     /**
    882      * <p>
    883      * Same as <code>handleSurrogates(CharBuffer, char)</code>, but with arrays. As an added
    884      * requirement, the calling method must also increment the index if this method returns
    885      * <code>null</code>.
    886      * </p>
    887      *
    888      *
    889      * @param source
    890      *            The encoding source.
    891      * @param lead
    892      *            A character that may be the first in a surrogate pair.
    893      * @return <code>CoderResult.malformedForLength(1)</code> or
    894      *         <code>CoderResult.UNDERFLOW</code> if there is a problem, or <code>null</code> if
    895      *         there isn't.
    896      * @see #handleSurrogates(CharBuffer, char)
    897      * @see #handleSurrogates(char[], int, int, char)
    898      */
    899     final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
    900             int sourceLimit, char lead) {
    901         if (!UTF16.isLeadSurrogate(lead)) {
    902             fromUChar32 = lead;
    903             return CoderResult.malformedForLength(1);
    904         }
    905 
    906         if (sourceIndex >= sourceLimit) {
    907             fromUChar32 = lead;
    908             return CoderResult.UNDERFLOW;
    909         }
    910 
    911         char trail = sourceArray[sourceIndex];
    912 
    913         if (!UTF16.isTrailSurrogate(trail)) {
    914             fromUChar32 = lead;
    915             return CoderResult.malformedForLength(1);
    916         }
    917 
    918         fromUChar32 = UCharacter.getCodePoint(lead, trail);
    919         return null;
    920     }
    921 
    922     /**
    923      * Returns the maxCharsPerByte value for the Charset that created this encoder.
    924      * @return maxCharsPerByte
    925      * @stable ICU 4.8
    926      */
    927     public final float maxCharsPerByte() {
    928         return ((CharsetICU)(this.charset())).maxCharsPerByte;
    929     }
    930 
    931     /**
    932      * Calculates the size of a buffer for conversion from Unicode to a charset.
    933      * The calculated size is guaranteed to be sufficient for this conversion.
    934      *
    935      * It takes into account initial and final non-character bytes that are output
    936      * by some converters.
    937      * It does not take into account callbacks which output more than one charset
    938      * character sequence per call, like escape callbacks.
    939      * The default (substitution) callback only outputs one charset character sequence.
    940      *
    941      * @param length Number of chars to be converted.
    942      * @param maxCharSize Return value from maxBytesPerChar for the converter
    943      *                    that will be used.
    944      * @return Size of a buffer that will be large enough to hold the output of bytes
    945      *
    946      * @stable ICU 49
    947      */
    948     public static int getMaxBytesForString(int length, int maxCharSize) {
    949         return ((length + 10) * maxCharSize);
    950     }
    951 
    952 }
    953