1 /**************************************************************** 2 * Licensed to the Apache Software Foundation (ASF) under one * 3 * or more contributor license agreements. See the NOTICE file * 4 * distributed with this work for additional information * 5 * regarding copyright ownership. The ASF licenses this file * 6 * to you under the Apache License, Version 2.0 (the * 7 * "License"); you may not use this file except in compliance * 8 * with the License. You may obtain a copy of the License at * 9 * * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, * 13 * software distributed under the License is distributed on an * 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 15 * KIND, either express or implied. See the License for the * 16 * specific language governing permissions and limitations * 17 * under the License. * 18 ****************************************************************/ 19 20 package org.apache.james.mime4j.decoder; 21 22 //BEGIN android-changed: Stubbing out logging 23 import org.apache.james.mime4j.Log; 24 import org.apache.james.mime4j.LogFactory; 25 //END android-changed 26 import org.apache.james.mime4j.util.CharsetUtil; 27 28 import java.io.ByteArrayInputStream; 29 import java.io.ByteArrayOutputStream; 30 import java.io.IOException; 31 import java.io.UnsupportedEncodingException; 32 33 /** 34 * Static methods for decoding strings, byte arrays and encoded words. 35 * 36 * 37 * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $ 38 */ 39 public class DecoderUtil { 40 private static Log log = LogFactory.getLog(DecoderUtil.class); 41 42 /** 43 * Decodes a string containing quoted-printable encoded data. 44 * 45 * @param s the string to decode. 46 * @return the decoded bytes. 47 */ 48 public static byte[] decodeBaseQuotedPrintable(String s) { 49 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 50 51 try { 52 byte[] bytes = s.getBytes("US-ASCII"); 53 54 QuotedPrintableInputStream is = new QuotedPrintableInputStream( 55 new ByteArrayInputStream(bytes)); 56 57 int b = 0; 58 while ((b = is.read()) != -1) { 59 baos.write(b); 60 } 61 } catch (IOException e) { 62 /* 63 * This should never happen! 64 */ 65 log.error(e); 66 } 67 68 return baos.toByteArray(); 69 } 70 71 /** 72 * Decodes a string containing base64 encoded data. 73 * 74 * @param s the string to decode. 75 * @return the decoded bytes. 76 */ 77 public static byte[] decodeBase64(String s) { 78 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 79 80 try { 81 byte[] bytes = s.getBytes("US-ASCII"); 82 83 Base64InputStream is = new Base64InputStream( 84 new ByteArrayInputStream(bytes)); 85 86 int b = 0; 87 while ((b = is.read()) != -1) { 88 baos.write(b); 89 } 90 } catch (IOException e) { 91 /* 92 * This should never happen! 93 */ 94 log.error(e); 95 } 96 97 return baos.toByteArray(); 98 } 99 100 /** 101 * Decodes an encoded word encoded with the 'B' encoding (described in 102 * RFC 2047) found in a header field body. 103 * 104 * @param encodedWord the encoded word to decode. 105 * @param charset the Java charset to use. 106 * @return the decoded string. 107 * @throws UnsupportedEncodingException if the given Java charset isn't 108 * supported. 109 */ 110 public static String decodeB(String encodedWord, String charset) 111 throws UnsupportedEncodingException { 112 113 return new String(decodeBase64(encodedWord), charset); 114 } 115 116 /** 117 * Decodes an encoded word encoded with the 'Q' encoding (described in 118 * RFC 2047) found in a header field body. 119 * 120 * @param encodedWord the encoded word to decode. 121 * @param charset the Java charset to use. 122 * @return the decoded string. 123 * @throws UnsupportedEncodingException if the given Java charset isn't 124 * supported. 125 */ 126 public static String decodeQ(String encodedWord, String charset) 127 throws UnsupportedEncodingException { 128 129 /* 130 * Replace _ with =20 131 */ 132 StringBuffer sb = new StringBuffer(); 133 for (int i = 0; i < encodedWord.length(); i++) { 134 char c = encodedWord.charAt(i); 135 if (c == '_') { 136 sb.append("=20"); 137 } else { 138 sb.append(c); 139 } 140 } 141 142 return new String(decodeBaseQuotedPrintable(sb.toString()), charset); 143 } 144 145 /** 146 * Decodes a string containing encoded words as defined by RFC 2047. 147 * Encoded words in have the form 148 * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for 149 * quoted-printable and 'B' or 'b' for Base64. 150 * 151 * ANDROID: COPIED FROM A NEWER VERSION OF MIME4J 152 * 153 * @param body the string to decode. 154 * @return the decoded string. 155 */ 156 public static String decodeEncodedWords(String body) { 157 158 // ANDROID: Most strings will not include "=?" so a quick test can prevent unneeded 159 // object creation. This could also be handled via lazy creation of the StringBuilder. 160 if (body.indexOf("=?") == -1) { 161 return body; 162 } 163 164 int previousEnd = 0; 165 boolean previousWasEncoded = false; 166 167 StringBuilder sb = new StringBuilder(); 168 169 while (true) { 170 int begin = body.indexOf("=?", previousEnd); 171 172 // ANDROID: The mime4j original version has an error here. It gets confused if 173 // the encoded string begins with an '=' (just after "?Q?"). This patch seeks forward 174 // to find the two '?' in the "header", before looking for the final "?=". 175 if (begin == -1) { 176 break; 177 } 178 int qm1 = body.indexOf('?', begin + 2); 179 if (qm1 == -1) { 180 break; 181 } 182 int qm2 = body.indexOf('?', qm1 + 1); 183 if (qm2 == -1) { 184 break; 185 } 186 int end = body.indexOf("?=", qm2 + 1); 187 if (end == -1) { 188 break; 189 } 190 end += 2; 191 192 String sep = body.substring(previousEnd, begin); 193 194 String decoded = decodeEncodedWord(body, begin, end); 195 if (decoded == null) { 196 sb.append(sep); 197 sb.append(body.substring(begin, end)); 198 } else { 199 if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) { 200 sb.append(sep); 201 } 202 sb.append(decoded); 203 } 204 205 previousEnd = end; 206 previousWasEncoded = decoded != null; 207 } 208 209 if (previousEnd == 0) 210 return body; 211 212 sb.append(body.substring(previousEnd)); 213 return sb.toString(); 214 } 215 216 // return null on error. Begin is index of '=?' in body. 217 public static String decodeEncodedWord(String body, int begin, int end) { 218 // Skip the '?=' chars in body and scan forward from there for next '?' 219 int qm1 = body.indexOf('?', begin + 2); 220 if (qm1 == -1 || qm1 == end - 2) 221 return null; 222 223 int qm2 = body.indexOf('?', qm1 + 1); 224 if (qm2 == -1 || qm2 == end - 2) 225 return null; 226 227 String mimeCharset = body.substring(begin + 2, qm1); 228 String encoding = body.substring(qm1 + 1, qm2); 229 String encodedText = body.substring(qm2 + 1, end - 2); 230 231 String charset = CharsetUtil.toJavaCharset(mimeCharset); 232 if (charset == null) { 233 if (log.isWarnEnabled()) { 234 log.warn("MIME charset '" + mimeCharset + "' in encoded word '" 235 + body.substring(begin, end) + "' doesn't have a " 236 + "corresponding Java charset"); 237 } 238 return null; 239 } else if (!CharsetUtil.isDecodingSupported(charset)) { 240 if (log.isWarnEnabled()) { 241 log.warn("Current JDK doesn't support decoding of charset '" 242 + charset + "' (MIME charset '" + mimeCharset 243 + "' in encoded word '" + body.substring(begin, end) 244 + "')"); 245 } 246 return null; 247 } 248 249 if (encodedText.length() == 0) { 250 if (log.isWarnEnabled()) { 251 log.warn("Missing encoded text in encoded word: '" 252 + body.substring(begin, end) + "'"); 253 } 254 return null; 255 } 256 257 try { 258 if (encoding.equalsIgnoreCase("Q")) { 259 return DecoderUtil.decodeQ(encodedText, charset); 260 } else if (encoding.equalsIgnoreCase("B")) { 261 return DecoderUtil.decodeB(encodedText, charset); 262 } else { 263 if (log.isWarnEnabled()) { 264 log.warn("Warning: Unknown encoding in encoded word '" 265 + body.substring(begin, end) + "'"); 266 } 267 return null; 268 } 269 } catch (UnsupportedEncodingException e) { 270 // should not happen because of isDecodingSupported check above 271 if (log.isWarnEnabled()) { 272 log.warn("Unsupported encoding in encoded word '" 273 + body.substring(begin, end) + "'", e); 274 } 275 return null; 276 } catch (RuntimeException e) { 277 if (log.isWarnEnabled()) { 278 log.warn("Could not decode encoded word '" 279 + body.substring(begin, end) + "'", e); 280 } 281 return null; 282 } 283 } 284 } 285