Home | History | Annotate | Download | only in jutf7
      1 /* ====================================================================
      2  * Copyright (c) 2006 J.T. Beetstra
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining
      5  * a copy of this software and associated documentation files (the
      6  * "Software"), to deal in the Software without restriction, including
      7  * without limitation the rights to use, copy, modify, merge, publish,
      8  * distribute, sublicense, and/or sell copies of the Software, and to
      9  * permit persons to whom the Software is furnished to do so, subject to
     10  * the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be
     13  * included in all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
     19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  * ====================================================================
     23  */
     24 
     25 package com.beetstra.jutf7;
     26 
     27 import java.nio.ByteBuffer;
     28 import java.nio.CharBuffer;
     29 import java.nio.charset.CharsetEncoder;
     30 import java.nio.charset.CoderResult;
     31 
     32 /**
     33  * <p>
     34  * The CharsetEncoder used to encode both variants of the UTF-7 charset and the
     35  * modified-UTF-7 charset.
     36  * </p>
     37  * <p>
     38  * <strong>Please note this class does not behave strictly according to the
     39  * specification in Sun Java VMs before 1.6.</strong> This is done to get around
     40  * a bug in the implementation of
     41  * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)}. Unfortunately,
     42  * that method cannot be overridden.
     43  * </p>
     44  *
     45  * @see <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">JDK
     46  *      bug 6221056< /a>
     47  * @author Jaap Beetstra
     48  */
     49 class UTF7StyleCharsetEncoder extends CharsetEncoder {
     50     private static final float AVG_BYTES_PER_CHAR = 1.5f;
     51     private static final float MAX_BYTES_PER_CHAR = 5.0f;
     52     private final UTF7StyleCharset cs;
     53     private final Base64Util base64;
     54     private final byte shift;
     55     private final byte unshift;
     56     private final boolean strict;
     57     private boolean base64mode;
     58     private int bitsToOutput;
     59     private int sextet;
     60     static boolean useUglyHackToForceCallToFlushInJava5;
     61     static {
     62         String version = System.getProperty("java.specification.version");
     63         String vendor = System.getProperty("java.vm.vendor");
     64         useUglyHackToForceCallToFlushInJava5 = "1.4".equals(version) || "1.5".equals(version);
     65         useUglyHackToForceCallToFlushInJava5 &= "Sun Microsystems Inc.".equals(vendor);
     66     }
     67 
     68     UTF7StyleCharsetEncoder(UTF7StyleCharset cs, Base64Util base64, boolean strict) {
     69         super(cs, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR);
     70         this.cs = cs;
     71         this.base64 = base64;
     72         this.strict = strict;
     73         this.shift = cs.shift();
     74         this.unshift = cs.unshift();
     75     }
     76 
     77     /*
     78      * (non-Javadoc)
     79      * @see java.nio.charset.CharsetEncoder#implReset()
     80      */
     81     protected void implReset() {
     82         base64mode = false;
     83         sextet = 0;
     84         bitsToOutput = 0;
     85     }
     86 
     87     /**
     88      * {@inheritDoc}
     89      * <p>
     90      * Note that this method might return <code>CoderResult.OVERFLOW</code> (as
     91      * is required by the specification) if insufficient space is available in
     92      * the output buffer. However, calling it again on JDKs before Java 6
     93      * triggers a bug in
     94      * {@link java.nio.charset.CharsetEncoder#flush(ByteBuffer)} causing it to
     95      * throw an IllegalStateException (the buggy method is <code>final</code>,
     96      * thus cannot be overridden).
     97      * </p>
     98      *
     99      * @see <a
    100      *      href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6227608">
    101      *      JDK bug 6227608< /a>
    102      * @param out The output byte buffer
    103      * @return A coder-result object describing the reason for termination
    104      */
    105     protected CoderResult implFlush(ByteBuffer out) {
    106         if (base64mode) {
    107             if (out.remaining() < 2)
    108                 return CoderResult.OVERFLOW;
    109             if (bitsToOutput != 0)
    110                 out.put(base64.getChar(sextet));
    111             out.put(unshift);
    112         }
    113         return CoderResult.UNDERFLOW;
    114     }
    115 
    116     /**
    117      * {@inheritDoc}
    118      * <p>
    119      * Note that this method might return <code>CoderResult.OVERFLOW</code>,
    120      * even though there is sufficient space available in the output buffer.
    121      * This is done to force the broken implementation of
    122      * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)} to call flush
    123      * (the buggy method is <code>final</code>, thus cannot be overridden).
    124      * </p>
    125      * <p>
    126      * However, String.getBytes() fails if CoderResult.OVERFLOW is returned,
    127      * since this assumes it always allocates sufficient bytes (maxBytesPerChar
    128      * * nr_of_chars). Thus, as an extra check, the size of the input buffer is
    129      * compared against the size of the output buffer. A static variable is used
    130      * to indicate if a broken java version is used.
    131      * </p>
    132      * <p>
    133      * It is not possible to directly write the last few bytes, since more bytes
    134      * might be waiting to be encoded then those available in the input buffer.
    135      * </p>
    136      *
    137      * @see <a
    138      *      href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">
    139      *      JDK bug 6221056< /a>
    140      * @param in The input character buffer
    141      * @param out The output byte buffer
    142      * @return A coder-result object describing the reason for termination
    143      */
    144     protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
    145         while (in.hasRemaining()) {
    146             if (out.remaining() < 4)
    147                 return CoderResult.OVERFLOW;
    148             char ch = in.get();
    149             if (cs.canEncodeDirectly(ch)) {
    150                 unshift(out, ch);
    151                 out.put((byte)ch);
    152             } else if (!base64mode && ch == shift) {
    153                 out.put(shift);
    154                 out.put(unshift);
    155             } else
    156                 encodeBase64(ch, out);
    157         }
    158         /*
    159          * <HACK type="ugly"> These lines are required to trick JDK 1.5 and
    160          * earlier into flushing when using Charset.encode(String),
    161          * Charset.encode(CharBuffer) or CharsetEncoder.encode(CharBuffer)
    162          * Without them, the last few bytes may be missing.
    163          */
    164         if (base64mode && useUglyHackToForceCallToFlushInJava5
    165                 && out.limit() != MAX_BYTES_PER_CHAR * in.limit())
    166             return CoderResult.OVERFLOW;
    167         /* </HACK> */
    168         return CoderResult.UNDERFLOW;
    169     }
    170 
    171     /**
    172      * <p>
    173      * Writes the bytes necessary to leave <i>base 64 mode</i>. This might
    174      * include an unshift character.
    175      * </p>
    176      *
    177      * @param out
    178      * @param ch
    179      */
    180     private void unshift(ByteBuffer out, char ch) {
    181         if (!base64mode)
    182             return;
    183         if (bitsToOutput != 0)
    184             out.put(base64.getChar(sextet));
    185         if (base64.contains(ch) || ch == unshift || strict)
    186             out.put(unshift);
    187         base64mode = false;
    188         sextet = 0;
    189         bitsToOutput = 0;
    190     }
    191 
    192     /**
    193      * <p>
    194      * Writes the bytes necessary to encode a character in <i>base 64 mode</i>.
    195      * All bytes which are fully determined will be written. The fields
    196      * <code>bitsToOutput</code> and <code>sextet</code> are used to remember
    197      * the bytes not yet fully determined.
    198      * </p>
    199      *
    200      * @param out
    201      * @param ch
    202      */
    203     private void encodeBase64(char ch, ByteBuffer out) {
    204         if (!base64mode)
    205             out.put(shift);
    206         base64mode = true;
    207         bitsToOutput += 16;
    208         while (bitsToOutput >= 6) {
    209             bitsToOutput -= 6;
    210             sextet += (ch >> bitsToOutput);
    211             sextet &= 0x3F;
    212             out.put(base64.getChar(sextet));
    213             sextet = 0;
    214         }
    215         sextet = (ch << (6 - bitsToOutput)) & 0x3F;
    216     }
    217 }
    218