Home | History | Annotate | Download | only in charset
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
      4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      5  *
      6  * This code is free software; you can redistribute it and/or modify it
      7  * under the terms of the GNU General Public License version 2 only, as
      8  * published by the Free Software Foundation.  Oracle designates this
      9  * particular file as subject to the "Classpath" exception as provided
     10  * by Oracle in the LICENSE file that accompanied this code.
     11  *
     12  * This code is distributed in the hope that it will be useful, but WITHOUT
     13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  * version 2 for more details (a copy is included in the LICENSE file that
     16  * accompanied this code).
     17  *
     18  * You should have received a copy of the GNU General Public License version
     19  * 2 along with this work; if not, write to the Free Software Foundation,
     20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21  *
     22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     23  * or visit www.oracle.com if you need additional information or have any
     24  * questions.
     25  */
     26 
     27 // -- This file was mechanically generated: Do not edit! -- //
     28 
     29 package java.nio.charset;
     30 
     31 import java.nio.Buffer;
     32 import java.nio.ByteBuffer;
     33 import java.nio.CharBuffer;
     34 import java.nio.BufferOverflowException;
     35 import java.nio.BufferUnderflowException;
     36 import java.lang.ref.WeakReference;
     37 import java.nio.charset.CoderMalfunctionError;                  // javadoc
     38 import java.util.Arrays;
     39 
     40 
     41 /**
     42  * An engine that can transform a sequence of sixteen-bit Unicode characters into a sequence of
     43  * bytes in a specific charset.
     44  *
     45  * <a name="steps"></a>
     46  *
     47  * <p> The input character sequence is provided in a character buffer or a series
     48  * of such buffers.  The output byte sequence is written to a byte buffer
     49  * or a series of such buffers.  An encoder should always be used by making
     50  * the following sequence of method invocations, hereinafter referred to as an
     51  * <i>encoding operation</i>:
     52  *
     53  * <ol>
     54  *
     55  *   <li><p> Reset the encoder via the {@link #reset reset} method, unless it
     56  *   has not been used before; </p></li>
     57  *
     58  *   <li><p> Invoke the {@link #encode encode} method zero or more times, as
     59  *   long as additional input may be available, passing <tt>false</tt> for the
     60  *   <tt>endOfInput</tt> argument and filling the input buffer and flushing the
     61  *   output buffer between invocations; </p></li>
     62  *
     63  *   <li><p> Invoke the {@link #encode encode} method one final time, passing
     64  *   <tt>true</tt> for the <tt>endOfInput</tt> argument; and then </p></li>
     65  *
     66  *   <li><p> Invoke the {@link #flush flush} method so that the encoder can
     67  *   flush any internal state to the output buffer. </p></li>
     68  *
     69  * </ol>
     70  *
     71  * Each invocation of the {@link #encode encode} method will encode as many
     72  * characters as possible from the input buffer, writing the resulting bytes
     73  * to the output buffer.  The {@link #encode encode} method returns when more
     74  * input is required, when there is not enough room in the output buffer, or
     75  * when an encoding error has occurred.  In each case a {@link CoderResult}
     76  * object is returned to describe the reason for termination.  An invoker can
     77  * examine this object and fill the input buffer, flush the output buffer, or
     78  * attempt to recover from an encoding error, as appropriate, and try again.
     79  *
     80  * <a name="ce"></a>
     81  *
     82  * <p> There are two general types of encoding errors.  If the input character
     83  * sequence is not a legal sixteen-bit Unicode sequence then the input is considered <i>malformed</i>.  If
     84  * the input character sequence is legal but cannot be mapped to a valid
     85  * byte sequence in the given charset then an <i>unmappable character</i> has been encountered.
     86  *
     87  * <a name="cae"></a>
     88  *
     89  * <p> How an encoding error is handled depends upon the action requested for
     90  * that type of error, which is described by an instance of the {@linkplain
     91  * CodingErrorAction} class.  The possible error actions are to {@linkplain
     92  * CodingErrorAction#IGNORE ignore} the erroneous input, {@link
     93  * CodingErrorAction#REPORT report} the error to the invoker via
     94  * the returned {@link CoderResult} object, or {@linkplain CodingErrorAction#REPLACE
     95  * replace} the erroneous input with the current value of the
     96  * replacement byte array.  The replacement
     97  *
     98 
     99  * is initially set to the encoder's default replacement, which often
    100  * (but not always) has the initial value&nbsp;<tt>{</tt>&nbsp;<tt>(byte)'?'</tt>&nbsp;<tt>}</tt>;
    101 
    102 
    103 
    104 
    105  *
    106  * its value may be changed via the {@link #replaceWith(byte[])
    107  * replaceWith} method.
    108  *
    109  * <p> The default action for malformed-input and unmappable-character errors
    110  * is to {@linkplain CodingErrorAction#REPORT report} them.  The
    111  * malformed-input error action may be changed via the {@link
    112  * #onMalformedInput(CodingErrorAction) onMalformedInput} method; the
    113  * unmappable-character action may be changed via the {@link
    114  * #onUnmappableCharacter(CodingErrorAction) onUnmappableCharacter} method.
    115  *
    116  * <p> This class is designed to handle many of the details of the encoding
    117  * process, including the implementation of error actions.  An encoder for a
    118  * specific charset, which is a concrete subclass of this class, need only
    119  * implement the abstract {@link #encodeLoop encodeLoop} method, which
    120  * encapsulates the basic encoding loop.  A subclass that maintains internal
    121  * state should, additionally, override the {@link #implFlush implFlush} and
    122  * {@link #implReset implReset} methods.
    123  *
    124  * <p> Instances of this class are not safe for use by multiple concurrent
    125  * threads.  </p>
    126  *
    127  *
    128  * @author Mark Reinhold
    129  * @author JSR-51 Expert Group
    130  * @since 1.4
    131  *
    132  * @see ByteBuffer
    133  * @see CharBuffer
    134  * @see Charset
    135  * @see CharsetDecoder
    136  */
    137 
    138 public abstract class CharsetEncoder {
    139 
    140     private final Charset charset;
    141     private final float averageBytesPerChar;
    142     private final float maxBytesPerChar;
    143 
    144     private byte[] replacement;
    145     private CodingErrorAction malformedInputAction
    146         = CodingErrorAction.REPORT;
    147     private CodingErrorAction unmappableCharacterAction
    148         = CodingErrorAction.REPORT;
    149 
    150     // Internal states
    151     //
    152     private static final int ST_RESET   = 0;
    153     private static final int ST_CODING  = 1;
    154     private static final int ST_END     = 2;
    155     private static final int ST_FLUSHED = 3;
    156 
    157     private int state = ST_RESET;
    158 
    159     private static String stateNames[]
    160         = { "RESET", "CODING", "CODING_END", "FLUSHED" };
    161 
    162 
    163     /**
    164      * Initializes a new encoder.  The new encoder will have the given
    165      * bytes-per-char and replacement values.
    166      *
    167      * @param  cs
    168      *         The charset that created this encoder
    169      *
    170      * @param  averageBytesPerChar
    171      *         A positive float value indicating the expected number of
    172      *         bytes that will be produced for each input character
    173      *
    174      * @param  maxBytesPerChar
    175      *         A positive float value indicating the maximum number of
    176      *         bytes that will be produced for each input character
    177      *
    178      * @param  replacement
    179      *         The initial replacement; must not be <tt>null</tt>, must have
    180      *         non-zero length, must not be longer than maxBytesPerChar,
    181      *         and must be {@linkplain #isLegalReplacement legal}
    182      *
    183      * @throws  IllegalArgumentException
    184      *          If the preconditions on the parameters do not hold
    185      */
    186     protected
    187     CharsetEncoder(Charset cs,
    188                    float averageBytesPerChar,
    189                    float maxBytesPerChar,
    190                    byte[] replacement)
    191     // BEGIN Android-changed
    192     {
    193       this(cs, averageBytesPerChar, maxBytesPerChar, replacement, false);
    194     }
    195 
    196     CharsetEncoder(Charset cs,
    197                    float averageBytesPerChar,
    198                    float maxBytesPerChar,
    199                    byte[] replacement,
    200                    boolean trusted)
    201                    // END Android-changed
    202     {
    203         this.charset = cs;
    204         if (averageBytesPerChar <= 0.0f)
    205             throw new IllegalArgumentException("Non-positive "
    206                                                + "averageBytesPerChar");
    207         if (maxBytesPerChar <= 0.0f)
    208             throw new IllegalArgumentException("Non-positive "
    209                                                + "maxBytesPerChar");
    210         if (!Charset.atBugLevel("1.4")) {
    211             if (averageBytesPerChar > maxBytesPerChar)
    212                 throw new IllegalArgumentException("averageBytesPerChar"
    213                                                    + " exceeds "
    214                                                    + "maxBytesPerChar");
    215         }
    216         this.replacement = replacement;
    217         this.averageBytesPerChar = averageBytesPerChar;
    218         this.maxBytesPerChar = maxBytesPerChar;
    219         // BEGIN Android-changed
    220         if (!trusted) {
    221         // END Android-changed
    222         replaceWith(replacement);
    223         // BEGIN Android-changed
    224         }
    225         // END Android-changed
    226     }
    227 
    228     /**
    229      * Initializes a new encoder.  The new encoder will have the given
    230      * bytes-per-char values and its replacement will be the
    231      * byte array <tt>{</tt>&nbsp;<tt>(byte)'?'</tt>&nbsp;<tt>}</tt>.
    232      *
    233      * @param  cs
    234      *         The charset that created this encoder
    235      *
    236      * @param  averageBytesPerChar
    237      *         A positive float value indicating the expected number of
    238      *         bytes that will be produced for each input character
    239      *
    240      * @param  maxBytesPerChar
    241      *         A positive float value indicating the maximum number of
    242      *         bytes that will be produced for each input character
    243      *
    244      * @throws  IllegalArgumentException
    245      *          If the preconditions on the parameters do not hold
    246      */
    247     protected CharsetEncoder(Charset cs,
    248                              float averageBytesPerChar,
    249                              float maxBytesPerChar)
    250     {
    251         this(cs,
    252              averageBytesPerChar, maxBytesPerChar,
    253              new byte[] { (byte)'?' });
    254     }
    255 
    256     /**
    257      * Returns the charset that created this encoder.
    258      *
    259      * @return  This encoder's charset
    260      */
    261     public final Charset charset() {
    262         return charset;
    263     }
    264 
    265     /**
    266      * Returns this encoder's replacement value.
    267      *
    268      * @return  This encoder's current replacement,
    269      *          which is never <tt>null</tt> and is never empty
    270      */
    271     public final byte[] replacement() {
    272         return Arrays.copyOf(replacement, replacement.length);
    273     }
    274 
    275     /**
    276      * Changes this encoder's replacement value.
    277      *
    278      * <p> This method invokes the {@link #implReplaceWith implReplaceWith}
    279      * method, passing the new replacement, after checking that the new
    280      * replacement is acceptable.  </p>
    281      *
    282      * @param  newReplacement  The replacement value
    283      *
    284 
    285 
    286 
    287 
    288 
    289      *         The new replacement; must not be <tt>null</tt>, must have
    290      *         non-zero length, must not be longer than the value returned by
    291      *         the {@link #maxBytesPerChar() maxBytesPerChar} method, and
    292      *         must be {@link #isLegalReplacement legal}
    293 
    294      *
    295      * @return  This encoder
    296      *
    297      * @throws  IllegalArgumentException
    298      *          If the preconditions on the parameter do not hold
    299      */
    300     public final CharsetEncoder replaceWith(byte[] newReplacement) {
    301         if (newReplacement == null)
    302             throw new IllegalArgumentException("Null replacement");
    303         int len = newReplacement.length;
    304         if (len == 0)
    305             throw new IllegalArgumentException("Empty replacement");
    306         if (len > maxBytesPerChar)
    307             throw new IllegalArgumentException("Replacement too long");
    308 
    309 
    310 
    311 
    312         if (!isLegalReplacement(newReplacement))
    313             throw new IllegalArgumentException("Illegal replacement");
    314         this.replacement = Arrays.copyOf(newReplacement, newReplacement.length);
    315 
    316         this.replacement = newReplacement;
    317         implReplaceWith(this.replacement);
    318         return this;
    319     }
    320 
    321     /**
    322      * Reports a change to this encoder's replacement value.
    323      *
    324      * <p> The default implementation of this method does nothing.  This method
    325      * should be overridden by encoders that require notification of changes to
    326      * the replacement.  </p>
    327      *
    328      * @param  newReplacement    The replacement value
    329      */
    330     protected void implReplaceWith(byte[] newReplacement) {
    331     }
    332 
    333 
    334 
    335     private WeakReference<CharsetDecoder> cachedDecoder = null;
    336 
    337     /**
    338      * Tells whether or not the given byte array is a legal replacement value
    339      * for this encoder.
    340      *
    341      * <p> A replacement is legal if, and only if, it is a legal sequence of
    342      * bytes in this encoder's charset; that is, it must be possible to decode
    343      * the replacement into one or more sixteen-bit Unicode characters.
    344      *
    345      * <p> The default implementation of this method is not very efficient; it
    346      * should generally be overridden to improve performance.  </p>
    347      *
    348      * @param  repl  The byte array to be tested
    349      *
    350      * @return  <tt>true</tt> if, and only if, the given byte array
    351      *          is a legal replacement value for this encoder
    352      */
    353     public boolean isLegalReplacement(byte[] repl) {
    354         WeakReference<CharsetDecoder> wr = cachedDecoder;
    355         CharsetDecoder dec = null;
    356         if ((wr == null) || ((dec = wr.get()) == null)) {
    357             dec = charset().newDecoder();
    358             dec.onMalformedInput(CodingErrorAction.REPORT);
    359             dec.onUnmappableCharacter(CodingErrorAction.REPORT);
    360             cachedDecoder = new WeakReference<CharsetDecoder>(dec);
    361         } else {
    362             dec.reset();
    363         }
    364         ByteBuffer bb = ByteBuffer.wrap(repl);
    365         CharBuffer cb = CharBuffer.allocate((int)(bb.remaining()
    366                                                   * dec.maxCharsPerByte()));
    367         CoderResult cr = dec.decode(bb, cb, true);
    368         return !cr.isError();
    369     }
    370 
    371 
    372 
    373     /**
    374      * Returns this encoder's current action for malformed-input errors.
    375      *
    376      * @return The current malformed-input action, which is never <tt>null</tt>
    377      */
    378     public CodingErrorAction malformedInputAction() {
    379         return malformedInputAction;
    380     }
    381 
    382     /**
    383      * Changes this encoder's action for malformed-input errors.
    384      *
    385      * <p> This method invokes the {@link #implOnMalformedInput
    386      * implOnMalformedInput} method, passing the new action.  </p>
    387      *
    388      * @param  newAction  The new action; must not be <tt>null</tt>
    389      *
    390      * @return  This encoder
    391      *
    392      * @throws IllegalArgumentException
    393      *         If the precondition on the parameter does not hold
    394      */
    395     public final CharsetEncoder onMalformedInput(CodingErrorAction newAction) {
    396         if (newAction == null)
    397             throw new IllegalArgumentException("Null action");
    398         malformedInputAction = newAction;
    399         implOnMalformedInput(newAction);
    400         return this;
    401     }
    402 
    403     /**
    404      * Reports a change to this encoder's malformed-input action.
    405      *
    406      * <p> The default implementation of this method does nothing.  This method
    407      * should be overridden by encoders that require notification of changes to
    408      * the malformed-input action.  </p>
    409      *
    410      * @param  newAction  The new action
    411      */
    412     protected void implOnMalformedInput(CodingErrorAction newAction) { }
    413 
    414     /**
    415      * Returns this encoder's current action for unmappable-character errors.
    416      *
    417      * @return The current unmappable-character action, which is never
    418      *         <tt>null</tt>
    419      */
    420     public CodingErrorAction unmappableCharacterAction() {
    421         return unmappableCharacterAction;
    422     }
    423 
    424     /**
    425      * Changes this encoder's action for unmappable-character errors.
    426      *
    427      * <p> This method invokes the {@link #implOnUnmappableCharacter
    428      * implOnUnmappableCharacter} method, passing the new action.  </p>
    429      *
    430      * @param  newAction  The new action; must not be <tt>null</tt>
    431      *
    432      * @return  This encoder
    433      *
    434      * @throws IllegalArgumentException
    435      *         If the precondition on the parameter does not hold
    436      */
    437     public final CharsetEncoder onUnmappableCharacter(CodingErrorAction
    438                                                       newAction)
    439     {
    440         if (newAction == null)
    441             throw new IllegalArgumentException("Null action");
    442         unmappableCharacterAction = newAction;
    443         implOnUnmappableCharacter(newAction);
    444         return this;
    445     }
    446 
    447     /**
    448      * Reports a change to this encoder's unmappable-character action.
    449      *
    450      * <p> The default implementation of this method does nothing.  This method
    451      * should be overridden by encoders that require notification of changes to
    452      * the unmappable-character action.  </p>
    453      *
    454      * @param  newAction  The new action
    455      */
    456     protected void implOnUnmappableCharacter(CodingErrorAction newAction) { }
    457 
    458     /**
    459      * Returns the average number of bytes that will be produced for each
    460      * character of input.  This heuristic value may be used to estimate the size
    461      * of the output buffer required for a given input sequence.
    462      *
    463      * @return  The average number of bytes produced
    464      *          per character of input
    465      */
    466     public final float averageBytesPerChar() {
    467         return averageBytesPerChar;
    468     }
    469 
    470     /**
    471      * Returns the maximum number of bytes that will be produced for each
    472      * character of input.  This value may be used to compute the worst-case size
    473      * of the output buffer required for a given input sequence.
    474      *
    475      * @return  The maximum number of bytes that will be produced per
    476      *          character of input
    477      */
    478     public final float maxBytesPerChar() {
    479         return maxBytesPerChar;
    480     }
    481 
    482     /**
    483      * Encodes as many characters as possible from the given input buffer,
    484      * writing the results to the given output buffer.
    485      *
    486      * <p> The buffers are read from, and written to, starting at their current
    487      * positions.  At most {@link Buffer#remaining in.remaining()} characters
    488      * will be read and at most {@link Buffer#remaining out.remaining()}
    489      * bytes will be written.  The buffers' positions will be advanced to
    490      * reflect the characters read and the bytes written, but their marks and
    491      * limits will not be modified.
    492      *
    493      * <p> In addition to reading characters from the input buffer and writing
    494      * bytes to the output buffer, this method returns a {@link CoderResult}
    495      * object to describe its reason for termination:
    496      *
    497      * <ul>
    498      *
    499      *   <li><p> {@link CoderResult#UNDERFLOW} indicates that as much of the
    500      *   input buffer as possible has been encoded.  If there is no further
    501      *   input then the invoker can proceed to the next step of the
    502      *   <a href="#steps">encoding operation</a>.  Otherwise this method
    503      *   should be invoked again with further input.  </p></li>
    504      *
    505      *   <li><p> {@link CoderResult#OVERFLOW} indicates that there is
    506      *   insufficient space in the output buffer to encode any more characters.
    507      *   This method should be invoked again with an output buffer that has
    508      *   more {@linkplain Buffer#remaining remaining} bytes. This is
    509      *   typically done by draining any encoded bytes from the output
    510      *   buffer.  </p></li>
    511      *
    512      *   <li><p> A {@linkplain CoderResult#malformedForLength
    513      *   malformed-input} result indicates that a malformed-input
    514      *   error has been detected.  The malformed characters begin at the input
    515      *   buffer's (possibly incremented) position; the number of malformed
    516      *   characters may be determined by invoking the result object's {@link
    517      *   CoderResult#length() length} method.  This case applies only if the
    518      *   {@linkplain #onMalformedInput malformed action} of this encoder
    519      *   is {@link CodingErrorAction#REPORT}; otherwise the malformed input
    520      *   will be ignored or replaced, as requested.  </p></li>
    521      *
    522      *   <li><p> An {@linkplain CoderResult#unmappableForLength
    523      *   unmappable-character} result indicates that an
    524      *   unmappable-character error has been detected.  The characters that
    525      *   encode the unmappable character begin at the input buffer's (possibly
    526      *   incremented) position; the number of such characters may be determined
    527      *   by invoking the result object's {@link CoderResult#length() length}
    528      *   method.  This case applies only if the {@linkplain #onUnmappableCharacter
    529      *   unmappable action} of this encoder is {@link
    530      *   CodingErrorAction#REPORT}; otherwise the unmappable character will be
    531      *   ignored or replaced, as requested.  </p></li>
    532      *
    533      * </ul>
    534      *
    535      * In any case, if this method is to be reinvoked in the same encoding
    536      * operation then care should be taken to preserve any characters remaining
    537      * in the input buffer so that they are available to the next invocation.
    538      *
    539      * <p> The <tt>endOfInput</tt> parameter advises this method as to whether
    540      * the invoker can provide further input beyond that contained in the given
    541      * input buffer.  If there is a possibility of providing additional input
    542      * then the invoker should pass <tt>false</tt> for this parameter; if there
    543      * is no possibility of providing further input then the invoker should
    544      * pass <tt>true</tt>.  It is not erroneous, and in fact it is quite
    545      * common, to pass <tt>false</tt> in one invocation and later discover that
    546      * no further input was actually available.  It is critical, however, that
    547      * the final invocation of this method in a sequence of invocations always
    548      * pass <tt>true</tt> so that any remaining unencoded input will be treated
    549      * as being malformed.
    550      *
    551      * <p> This method works by invoking the {@link #encodeLoop encodeLoop}
    552      * method, interpreting its results, handling error conditions, and
    553      * reinvoking it as necessary.  </p>
    554      *
    555      *
    556      * @param  in
    557      *         The input character buffer
    558      *
    559      * @param  out
    560      *         The output byte buffer
    561      *
    562      * @param  endOfInput
    563      *         <tt>true</tt> if, and only if, the invoker can provide no
    564      *         additional input characters beyond those in the given buffer
    565      *
    566      * @return  A coder-result object describing the reason for termination
    567      *
    568      * @throws  IllegalStateException
    569      *          If an encoding operation is already in progress and the previous
    570      *          step was an invocation neither of the {@link #reset reset}
    571      *          method, nor of this method with a value of <tt>false</tt> for
    572      *          the <tt>endOfInput</tt> parameter, nor of this method with a
    573      *          value of <tt>true</tt> for the <tt>endOfInput</tt> parameter
    574      *          but a return value indicating an incomplete encoding operation
    575      *
    576      * @throws  CoderMalfunctionError
    577      *          If an invocation of the encodeLoop method threw
    578      *          an unexpected exception
    579      */
    580     public final CoderResult encode(CharBuffer in, ByteBuffer out,
    581                                     boolean endOfInput)
    582     {
    583         int newState = endOfInput ? ST_END : ST_CODING;
    584         if ((state != ST_RESET) && (state != ST_CODING)
    585             && !(endOfInput && (state == ST_END)))
    586             throwIllegalStateException(state, newState);
    587         state = newState;
    588 
    589         for (;;) {
    590 
    591             CoderResult cr;
    592             try {
    593                 cr = encodeLoop(in, out);
    594             } catch (BufferUnderflowException x) {
    595                 throw new CoderMalfunctionError(x);
    596             } catch (BufferOverflowException x) {
    597                 throw new CoderMalfunctionError(x);
    598             }
    599 
    600             if (cr.isOverflow())
    601                 return cr;
    602 
    603             if (cr.isUnderflow()) {
    604                 if (endOfInput && in.hasRemaining()) {
    605                     cr = CoderResult.malformedForLength(in.remaining());
    606                     // Fall through to malformed-input case
    607                 } else {
    608                     return cr;
    609                 }
    610             }
    611 
    612             CodingErrorAction action = null;
    613             if (cr.isMalformed())
    614                 action = malformedInputAction;
    615             else if (cr.isUnmappable())
    616                 action = unmappableCharacterAction;
    617             else
    618                 assert false : cr.toString();
    619 
    620             if (action == CodingErrorAction.REPORT)
    621                 return cr;
    622 
    623             if (action == CodingErrorAction.REPLACE) {
    624                 if (out.remaining() < replacement.length)
    625                     return CoderResult.OVERFLOW;
    626                 out.put(replacement);
    627             }
    628 
    629             if ((action == CodingErrorAction.IGNORE)
    630                 || (action == CodingErrorAction.REPLACE)) {
    631                 // Skip erroneous input either way
    632                 in.position(in.position() + cr.length());
    633                 continue;
    634             }
    635 
    636             assert false;
    637         }
    638 
    639     }
    640 
    641     /**
    642      * Flushes this encoder.
    643      *
    644      * <p> Some encoders maintain internal state and may need to write some
    645      * final bytes to the output buffer once the overall input sequence has
    646      * been read.
    647      *
    648      * <p> Any additional output is written to the output buffer beginning at
    649      * its current position.  At most {@link Buffer#remaining out.remaining()}
    650      * bytes will be written.  The buffer's position will be advanced
    651      * appropriately, but its mark and limit will not be modified.
    652      *
    653      * <p> If this method completes successfully then it returns {@link
    654      * CoderResult#UNDERFLOW}.  If there is insufficient room in the output
    655      * buffer then it returns {@link CoderResult#OVERFLOW}.  If this happens
    656      * then this method must be invoked again, with an output buffer that has
    657      * more room, in order to complete the current <a href="#steps">encoding
    658      * operation</a>.
    659      *
    660      * <p> If this encoder has already been flushed then invoking this method
    661      * has no effect.
    662      *
    663      * <p> This method invokes the {@link #implFlush implFlush} method to
    664      * perform the actual flushing operation.  </p>
    665      *
    666      * @param  out
    667      *         The output byte buffer
    668      *
    669      * @return  A coder-result object, either {@link CoderResult#UNDERFLOW} or
    670      *          {@link CoderResult#OVERFLOW}
    671      *
    672      * @throws  IllegalStateException
    673      *          If the previous step of the current encoding operation was an
    674      *          invocation neither of the {@link #flush flush} method nor of
    675      *          the three-argument {@link
    676      *          #encode(CharBuffer,ByteBuffer,boolean) encode} method
    677      *          with a value of <tt>true</tt> for the <tt>endOfInput</tt>
    678      *          parameter
    679      */
    680     public final CoderResult flush(ByteBuffer out) {
    681         if (state == ST_END) {
    682             CoderResult cr = implFlush(out);
    683             if (cr.isUnderflow())
    684                 state = ST_FLUSHED;
    685             return cr;
    686         }
    687 
    688         if (state != ST_FLUSHED)
    689             throwIllegalStateException(state, ST_FLUSHED);
    690 
    691         return CoderResult.UNDERFLOW; // Already flushed
    692     }
    693 
    694     /**
    695      * Flushes this encoder.
    696      *
    697      * <p> The default implementation of this method does nothing, and always
    698      * returns {@link CoderResult#UNDERFLOW}.  This method should be overridden
    699      * by encoders that may need to write final bytes to the output buffer
    700      * once the entire input sequence has been read. </p>
    701      *
    702      * @param  out
    703      *         The output byte buffer
    704      *
    705      * @return  A coder-result object, either {@link CoderResult#UNDERFLOW} or
    706      *          {@link CoderResult#OVERFLOW}
    707      */
    708     protected CoderResult implFlush(ByteBuffer out) {
    709         return CoderResult.UNDERFLOW;
    710     }
    711 
    712     /**
    713      * Resets this encoder, clearing any internal state.
    714      *
    715      * <p> This method resets charset-independent state and also invokes the
    716      * {@link #implReset() implReset} method in order to perform any
    717      * charset-specific reset actions.  </p>
    718      *
    719      * @return  This encoder
    720      *
    721      */
    722     public final CharsetEncoder reset() {
    723         implReset();
    724         state = ST_RESET;
    725         return this;
    726     }
    727 
    728     /**
    729      * Resets this encoder, clearing any charset-specific internal state.
    730      *
    731      * <p> The default implementation of this method does nothing.  This method
    732      * should be overridden by encoders that maintain internal state.  </p>
    733      */
    734     protected void implReset() { }
    735 
    736     /**
    737      * Encodes one or more characters into one or more bytes.
    738      *
    739      * <p> This method encapsulates the basic encoding loop, encoding as many
    740      * characters as possible until it either runs out of input, runs out of room
    741      * in the output buffer, or encounters an encoding error.  This method is
    742      * invoked by the {@link #encode encode} method, which handles result
    743      * interpretation and error recovery.
    744      *
    745      * <p> The buffers are read from, and written to, starting at their current
    746      * positions.  At most {@link Buffer#remaining in.remaining()} characters
    747      * will be read, and at most {@link Buffer#remaining out.remaining()}
    748      * bytes will be written.  The buffers' positions will be advanced to
    749      * reflect the characters read and the bytes written, but their marks and
    750      * limits will not be modified.
    751      *
    752      * <p> This method returns a {@link CoderResult} object to describe its
    753      * reason for termination, in the same manner as the {@link #encode encode}
    754      * method.  Most implementations of this method will handle encoding errors
    755      * by returning an appropriate result object for interpretation by the
    756      * {@link #encode encode} method.  An optimized implementation may instead
    757      * examine the relevant error action and implement that action itself.
    758      *
    759      * <p> An implementation of this method may perform arbitrary lookahead by
    760      * returning {@link CoderResult#UNDERFLOW} until it receives sufficient
    761      * input.  </p>
    762      *
    763      * @param  in
    764      *         The input character buffer
    765      *
    766      * @param  out
    767      *         The output byte buffer
    768      *
    769      * @return  A coder-result object describing the reason for termination
    770      */
    771     protected abstract CoderResult encodeLoop(CharBuffer in,
    772                                               ByteBuffer out);
    773 
    774     /**
    775      * Convenience method that encodes the remaining content of a single input
    776      * character buffer into a newly-allocated byte buffer.
    777      *
    778      * <p> This method implements an entire <a href="#steps">encoding
    779      * operation</a>; that is, it resets this encoder, then it encodes the
    780      * characters in the given character buffer, and finally it flushes this
    781      * encoder.  This method should therefore not be invoked if an encoding
    782      * operation is already in progress.  </p>
    783      *
    784      * @param  in
    785      *         The input character buffer
    786      *
    787      * @return A newly-allocated byte buffer containing the result of the
    788      *         encoding operation.  The buffer's position will be zero and its
    789      *         limit will follow the last byte written.
    790      *
    791      * @throws  IllegalStateException
    792      *          If an encoding operation is already in progress
    793      *
    794      * @throws  MalformedInputException
    795      *          If the character sequence starting at the input buffer's current
    796      *          position is not a legal sixteen-bit Unicode sequence and the current malformed-input action
    797      *          is {@link CodingErrorAction#REPORT}
    798      *
    799      * @throws  UnmappableCharacterException
    800      *          If the character sequence starting at the input buffer's current
    801      *          position cannot be mapped to an equivalent byte sequence and
    802      *          the current unmappable-character action is {@link
    803      *          CodingErrorAction#REPORT}
    804      */
    805     public final ByteBuffer encode(CharBuffer in)
    806         throws CharacterCodingException
    807     {
    808         int n = (int)(in.remaining() * averageBytesPerChar());
    809         ByteBuffer out = ByteBuffer.allocate(n);
    810 
    811         if ((n == 0) && (in.remaining() == 0))
    812             return out;
    813         reset();
    814         for (;;) {
    815             CoderResult cr = in.hasRemaining() ?
    816                 encode(in, out, true) : CoderResult.UNDERFLOW;
    817             if (cr.isUnderflow())
    818                 cr = flush(out);
    819 
    820             if (cr.isUnderflow())
    821                 break;
    822             if (cr.isOverflow()) {
    823                 n = 2*n + 1;    // Ensure progress; n might be 0!
    824                 ByteBuffer o = ByteBuffer.allocate(n);
    825                 out.flip();
    826                 o.put(out);
    827                 out = o;
    828                 continue;
    829             }
    830             cr.throwException();
    831         }
    832         out.flip();
    833         return out;
    834     }
    835 
    836 
    837 
    838 
    839 
    840 
    841 
    842 
    843 
    844 
    845 
    846 
    847 
    848 
    849 
    850 
    851 
    852 
    853 
    854 
    855 
    856 
    857 
    858 
    859 
    860 
    861 
    862 
    863 
    864 
    865 
    866 
    867 
    868 
    869 
    870 
    871 
    872 
    873 
    874 
    875 
    876 
    877 
    878 
    879 
    880 
    881 
    882 
    883 
    884 
    885 
    886 
    887 
    888 
    889 
    890 
    891 
    892 
    893 
    894 
    895 
    896 
    897 
    898 
    899 
    900 
    901 
    902 
    903 
    904 
    905 
    906 
    907 
    908 
    909 
    910 
    911 
    912 
    913 
    914     private boolean canEncode(CharBuffer cb) {
    915         // Empty buffers or char-sequences are always encodable by definition.
    916         if (!cb.hasRemaining()) {
    917             return true;
    918         }
    919 
    920         if (state == ST_FLUSHED)
    921             reset();
    922         else if (state != ST_RESET)
    923             throwIllegalStateException(state, ST_CODING);
    924         CodingErrorAction ma = malformedInputAction();
    925         CodingErrorAction ua = unmappableCharacterAction();
    926         try {
    927             onMalformedInput(CodingErrorAction.REPORT);
    928             onUnmappableCharacter(CodingErrorAction.REPORT);
    929             // Android-changed: Account for ignorable codepoints. ICU doesn't report
    930             // an error, but will return an empty buffer.
    931             ByteBuffer buf = encode(cb);
    932             return buf.hasRemaining();
    933         } catch (CharacterCodingException x) {
    934             // fall through to return false.
    935         } finally {
    936             onMalformedInput(ma);
    937             onUnmappableCharacter(ua);
    938             reset();
    939         }
    940         return false;
    941     }
    942 
    943     /**
    944      * Tells whether or not this encoder can encode the given character.
    945      *
    946      * <p> This method returns <tt>false</tt> if the given character is a
    947      * surrogate character; such characters can be interpreted only when they
    948      * are members of a pair consisting of a high surrogate followed by a low
    949      * surrogate.  The {@link #canEncode(java.lang.CharSequence)
    950      * canEncode(CharSequence)} method may be used to test whether or not a
    951      * character sequence can be encoded.
    952      *
    953      * <p> This method may modify this encoder's state; it should therefore not
    954      * be invoked if an <a href="#steps">encoding operation</a> is already in
    955      * progress.
    956      *
    957      * <p> The default implementation of this method is not very efficient; it
    958      * should generally be overridden to improve performance.  </p>
    959      *
    960      * @param   c
    961      *          The given character
    962      *
    963      * @return  <tt>true</tt> if, and only if, this encoder can encode
    964      *          the given character
    965      *
    966      * @throws  IllegalStateException
    967      *          If an encoding operation is already in progress
    968      */
    969     public boolean canEncode(char c) {
    970         CharBuffer cb = CharBuffer.allocate(1);
    971         cb.put(c);
    972         cb.flip();
    973         return canEncode(cb);
    974     }
    975 
    976     /**
    977      * Tells whether or not this encoder can encode the given character
    978      * sequence.
    979      *
    980      * <p> If this method returns <tt>false</tt> for a particular character
    981      * sequence then more information about why the sequence cannot be encoded
    982      * may be obtained by performing a full <a href="#steps">encoding
    983      * operation</a>.
    984      *
    985      * <p> This method may modify this encoder's state; it should therefore not
    986      * be invoked if an encoding operation is already in progress.
    987      *
    988      * <p> The default implementation of this method is not very efficient; it
    989      * should generally be overridden to improve performance.  </p>
    990      *
    991      * @param   cs
    992      *          The given character sequence
    993      *
    994      * @return  <tt>true</tt> if, and only if, this encoder can encode
    995      *          the given character without throwing any exceptions and without
    996      *          performing any replacements
    997      *
    998      * @throws  IllegalStateException
    999      *          If an encoding operation is already in progress
   1000      */
   1001     public boolean canEncode(CharSequence cs) {
   1002         CharBuffer cb;
   1003         if (cs instanceof CharBuffer)
   1004             cb = ((CharBuffer)cs).duplicate();
   1005         else
   1006             cb = CharBuffer.wrap(cs);
   1007         return canEncode(cb);
   1008     }
   1009 
   1010 
   1011 
   1012 
   1013     private void throwIllegalStateException(int from, int to) {
   1014         throw new IllegalStateException("Current state = " + stateNames[from]
   1015                                         + ", new state = " + stateNames[to]);
   1016     }
   1017 
   1018 }
   1019