1 /* ==================================================================== 2 * Copyright (c) 2006 J.T. Beetstra 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * ==================================================================== 23 */ 24 25 package com.beetstra.jutf7; 26 27 import java.nio.ByteBuffer; 28 import java.nio.CharBuffer; 29 import java.nio.charset.CharsetEncoder; 30 import java.nio.charset.CoderResult; 31 32 /** 33 * <p> 34 * The CharsetEncoder used to encode both variants of the UTF-7 charset and the 35 * modified-UTF-7 charset. 36 * </p> 37 * <p> 38 * <strong>Please note this class does not behave strictly according to the 39 * specification in Sun Java VMs before 1.6.</strong> This is done to get around 40 * a bug in the implementation of 41 * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)}. Unfortunately, 42 * that method cannot be overridden. 43 * </p> 44 * 45 * @see <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">JDK 46 * bug 6221056< /a> 47 * @author Jaap Beetstra 48 */ 49 class UTF7StyleCharsetEncoder extends CharsetEncoder { 50 private static final float AVG_BYTES_PER_CHAR = 1.5f; 51 private static final float MAX_BYTES_PER_CHAR = 5.0f; 52 private final UTF7StyleCharset cs; 53 private final Base64Util base64; 54 private final byte shift; 55 private final byte unshift; 56 private final boolean strict; 57 private boolean base64mode; 58 private int bitsToOutput; 59 private int sextet; 60 static boolean useUglyHackToForceCallToFlushInJava5; 61 static { 62 String version = System.getProperty("java.specification.version"); 63 String vendor = System.getProperty("java.vm.vendor"); 64 useUglyHackToForceCallToFlushInJava5 = "1.4".equals(version) || "1.5".equals(version); 65 useUglyHackToForceCallToFlushInJava5 &= "Sun Microsystems Inc.".equals(vendor); 66 } 67 68 UTF7StyleCharsetEncoder(UTF7StyleCharset cs, Base64Util base64, boolean strict) { 69 super(cs, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR); 70 this.cs = cs; 71 this.base64 = base64; 72 this.strict = strict; 73 this.shift = cs.shift(); 74 this.unshift = cs.unshift(); 75 } 76 77 /* 78 * (non-Javadoc) 79 * @see java.nio.charset.CharsetEncoder#implReset() 80 */ 81 protected void implReset() { 82 base64mode = false; 83 sextet = 0; 84 bitsToOutput = 0; 85 } 86 87 /** 88 * {@inheritDoc} 89 * <p> 90 * Note that this method might return <code>CoderResult.OVERFLOW</code> (as 91 * is required by the specification) if insufficient space is available in 92 * the output buffer. However, calling it again on JDKs before Java 6 93 * triggers a bug in 94 * {@link java.nio.charset.CharsetEncoder#flush(ByteBuffer)} causing it to 95 * throw an IllegalStateException (the buggy method is <code>final</code>, 96 * thus cannot be overridden). 97 * </p> 98 * 99 * @see <a 100 * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6227608"> 101 * JDK bug 6227608< /a> 102 * @param out The output byte buffer 103 * @return A coder-result object describing the reason for termination 104 */ 105 protected CoderResult implFlush(ByteBuffer out) { 106 if (base64mode) { 107 if (out.remaining() < 2) 108 return CoderResult.OVERFLOW; 109 if (bitsToOutput != 0) 110 out.put(base64.getChar(sextet)); 111 out.put(unshift); 112 } 113 return CoderResult.UNDERFLOW; 114 } 115 116 /** 117 * {@inheritDoc} 118 * <p> 119 * Note that this method might return <code>CoderResult.OVERFLOW</code>, 120 * even though there is sufficient space available in the output buffer. 121 * This is done to force the broken implementation of 122 * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)} to call flush 123 * (the buggy method is <code>final</code>, thus cannot be overridden). 124 * </p> 125 * <p> 126 * However, String.getBytes() fails if CoderResult.OVERFLOW is returned, 127 * since this assumes it always allocates sufficient bytes (maxBytesPerChar 128 * * nr_of_chars). Thus, as an extra check, the size of the input buffer is 129 * compared against the size of the output buffer. A static variable is used 130 * to indicate if a broken java version is used. 131 * </p> 132 * <p> 133 * It is not possible to directly write the last few bytes, since more bytes 134 * might be waiting to be encoded then those available in the input buffer. 135 * </p> 136 * 137 * @see <a 138 * href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056"> 139 * JDK bug 6221056< /a> 140 * @param in The input character buffer 141 * @param out The output byte buffer 142 * @return A coder-result object describing the reason for termination 143 */ 144 protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { 145 while (in.hasRemaining()) { 146 if (out.remaining() < 4) 147 return CoderResult.OVERFLOW; 148 char ch = in.get(); 149 if (cs.canEncodeDirectly(ch)) { 150 unshift(out, ch); 151 out.put((byte)ch); 152 } else if (!base64mode && ch == shift) { 153 out.put(shift); 154 out.put(unshift); 155 } else 156 encodeBase64(ch, out); 157 } 158 /* 159 * <HACK type="ugly"> These lines are required to trick JDK 1.5 and 160 * earlier into flushing when using Charset.encode(String), 161 * Charset.encode(CharBuffer) or CharsetEncoder.encode(CharBuffer) 162 * Without them, the last few bytes may be missing. 163 */ 164 if (base64mode && useUglyHackToForceCallToFlushInJava5 165 && out.limit() != MAX_BYTES_PER_CHAR * in.limit()) 166 return CoderResult.OVERFLOW; 167 /* </HACK> */ 168 return CoderResult.UNDERFLOW; 169 } 170 171 /** 172 * <p> 173 * Writes the bytes necessary to leave <i>base 64 mode</i>. This might 174 * include an unshift character. 175 * </p> 176 * 177 * @param out 178 * @param ch 179 */ 180 private void unshift(ByteBuffer out, char ch) { 181 if (!base64mode) 182 return; 183 if (bitsToOutput != 0) 184 out.put(base64.getChar(sextet)); 185 if (base64.contains(ch) || ch == unshift || strict) 186 out.put(unshift); 187 base64mode = false; 188 sextet = 0; 189 bitsToOutput = 0; 190 } 191 192 /** 193 * <p> 194 * Writes the bytes necessary to encode a character in <i>base 64 mode</i>. 195 * All bytes which are fully determined will be written. The fields 196 * <code>bitsToOutput</code> and <code>sextet</code> are used to remember 197 * the bytes not yet fully determined. 198 * </p> 199 * 200 * @param out 201 * @param ch 202 */ 203 private void encodeBase64(char ch, ByteBuffer out) { 204 if (!base64mode) 205 out.put(shift); 206 base64mode = true; 207 bitsToOutput += 16; 208 while (bitsToOutput >= 6) { 209 bitsToOutput -= 6; 210 sextet += (ch >> bitsToOutput); 211 sextet &= 0x3F; 212 out.put(base64.getChar(sextet)); 213 sextet = 0; 214 } 215 sextet = (ch << (6 - bitsToOutput)) & 0x3F; 216 } 217 } 218