1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.emailcommon.internet; 18 19 import android.text.TextUtils; 20 import android.util.Base64; 21 import android.util.Base64DataException; 22 import android.util.Base64InputStream; 23 import android.util.Log; 24 25 import com.android.emailcommon.mail.Body; 26 import com.android.emailcommon.mail.BodyPart; 27 import com.android.emailcommon.mail.Message; 28 import com.android.emailcommon.mail.MessagingException; 29 import com.android.emailcommon.mail.Multipart; 30 import com.android.emailcommon.mail.Part; 31 32 import org.apache.commons.io.IOUtils; 33 import org.apache.james.mime4j.codec.EncoderUtil; 34 import org.apache.james.mime4j.decoder.DecoderUtil; 35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; 36 import org.apache.james.mime4j.util.CharsetUtil; 37 38 import java.io.ByteArrayOutputStream; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.OutputStream; 42 import java.util.ArrayList; 43 import java.util.regex.Matcher; 44 import java.util.regex.Pattern; 45 46 public class MimeUtility { 47 private static final String LOG_TAG = "Email"; 48 49 public static final String MIME_TYPE_RFC822 = "message/rfc822"; 50 private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n"); 51 52 /** 53 * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string 54 * object whenever possible. 55 */ 56 public static String unfold(String s) { 57 if (s == null) { 58 return null; 59 } 60 Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s); 61 if (patternMatcher.find()) { 62 patternMatcher.reset(); 63 s = patternMatcher.replaceAll(""); 64 } 65 return s; 66 } 67 68 public static String decode(String s) { 69 if (s == null) { 70 return null; 71 } 72 return DecoderUtil.decodeEncodedWords(s); 73 } 74 75 public static String unfoldAndDecode(String s) { 76 return decode(unfold(s)); 77 } 78 79 // TODO implement proper foldAndEncode 80 // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent 81 // duplication of encoding. 82 public static String foldAndEncode(String s) { 83 return s; 84 } 85 86 /** 87 * INTERIM version of foldAndEncode that will be used only by Subject: headers. 88 * This is safer than implementing foldAndEncode() (see above) and risking unknown damage 89 * to other headers. 90 * 91 * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK. 92 * 93 * @param s original string to encode and fold 94 * @param usedCharacters number of characters already used up by header name 95 96 * @return the String ready to be transmitted 97 */ 98 public static String foldAndEncode2(String s, int usedCharacters) { 99 // james.mime4j.codec.EncoderUtil.java 100 // encode: encodeIfNecessary(text, usage, numUsedInHeaderName) 101 // Usage.TEXT_TOKENlooks like the right thing for subjects 102 // use WORD_ENTITY for address/names 103 104 String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, 105 usedCharacters); 106 107 return fold(encoded, usedCharacters); 108 } 109 110 /** 111 * INTERIM: From newer version of org.apache.james (but we don't want to import 112 * the entire MimeUtil class). 113 * 114 * Splits the specified string into a multiple-line representation with 115 * lines no longer than 76 characters (because the line might contain 116 * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 117 * 2047</a> section 2). If the string contains non-whitespace sequences 118 * longer than 76 characters a line break is inserted at the whitespace 119 * character following the sequence resulting in a line longer than 76 120 * characters. 121 * 122 * @param s 123 * string to split. 124 * @param usedCharacters 125 * number of characters already used up. Usually the number of 126 * characters for header field name plus colon and one space. 127 * @return a multiple-line representation of the given string. 128 */ 129 public static String fold(String s, int usedCharacters) { 130 final int maxCharacters = 76; 131 132 final int length = s.length(); 133 if (usedCharacters + length <= maxCharacters) 134 return s; 135 136 StringBuilder sb = new StringBuilder(); 137 138 int lastLineBreak = -usedCharacters; 139 int wspIdx = indexOfWsp(s, 0); 140 while (true) { 141 if (wspIdx == length) { 142 sb.append(s.substring(Math.max(0, lastLineBreak))); 143 return sb.toString(); 144 } 145 146 int nextWspIdx = indexOfWsp(s, wspIdx + 1); 147 148 if (nextWspIdx - lastLineBreak > maxCharacters) { 149 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); 150 sb.append("\r\n"); 151 lastLineBreak = wspIdx; 152 } 153 154 wspIdx = nextWspIdx; 155 } 156 } 157 158 /** 159 * INTERIM: From newer version of org.apache.james (but we don't want to import 160 * the entire MimeUtil class). 161 * 162 * Search for whitespace. 163 */ 164 private static int indexOfWsp(String s, int fromIndex) { 165 final int len = s.length(); 166 for (int index = fromIndex; index < len; index++) { 167 char c = s.charAt(index); 168 if (c == ' ' || c == '\t') 169 return index; 170 } 171 return len; 172 } 173 174 /** 175 * Returns the named parameter of a header field. If name is null the first 176 * parameter is returned, or if there are no additional parameters in the 177 * field the entire field is returned. Otherwise the named parameter is 178 * searched for in a case insensitive fashion and returned. If the parameter 179 * cannot be found the method returns null. 180 * 181 * TODO: quite inefficient with the inner trimming & splitting. 182 * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive. 183 * TODO: The doc says that for a null name you get the first param, but you get the header. 184 * Should probably just fix the doc, but if other code assumes that behavior, fix the code. 185 * TODO: Need to decode %-escaped strings, as in: filename="ab%22d". 186 * ('+' -> ' ' conversion too? check RFC) 187 * 188 * @param header 189 * @param name 190 * @return the entire header (if name=null), the found parameter, or null 191 */ 192 public static String getHeaderParameter(String header, String name) { 193 if (header == null) { 194 return null; 195 } 196 String[] parts = unfold(header).split(";"); 197 if (name == null) { 198 return parts[0].trim(); 199 } 200 String lowerCaseName = name.toLowerCase(); 201 for (String part : parts) { 202 if (part.trim().toLowerCase().startsWith(lowerCaseName)) { 203 String[] parameterParts = part.split("=", 2); 204 if (parameterParts.length < 2) { 205 return null; 206 } 207 String parameter = parameterParts[1].trim(); 208 if (parameter.startsWith("\"") && parameter.endsWith("\"")) { 209 return parameter.substring(1, parameter.length() - 1); 210 } else { 211 return parameter; 212 } 213 } 214 } 215 return null; 216 } 217 218 public static Part findFirstPartByMimeType(Part part, String mimeType) 219 throws MessagingException { 220 if (part.getBody() instanceof Multipart) { 221 Multipart multipart = (Multipart)part.getBody(); 222 for (int i = 0, count = multipart.getCount(); i < count; i++) { 223 BodyPart bodyPart = multipart.getBodyPart(i); 224 Part ret = findFirstPartByMimeType(bodyPart, mimeType); 225 if (ret != null) { 226 return ret; 227 } 228 } 229 } 230 else if (part.getMimeType().equalsIgnoreCase(mimeType)) { 231 return part; 232 } 233 return null; 234 } 235 236 public static Part findPartByContentId(Part part, String contentId) throws Exception { 237 if (part.getBody() instanceof Multipart) { 238 Multipart multipart = (Multipart)part.getBody(); 239 for (int i = 0, count = multipart.getCount(); i < count; i++) { 240 BodyPart bodyPart = multipart.getBodyPart(i); 241 Part ret = findPartByContentId(bodyPart, contentId); 242 if (ret != null) { 243 return ret; 244 } 245 } 246 } 247 String cid = part.getContentId(); 248 if (contentId.equals(cid)) { 249 return part; 250 } 251 return null; 252 } 253 254 /** 255 * Reads the Part's body and returns a String based on any charset conversion that needed 256 * to be done. 257 * @param part The part containing a body 258 * @return a String containing the converted text in the body, or null if there was no text 259 * or an error during conversion. 260 */ 261 public static String getTextFromPart(Part part) { 262 try { 263 if (part != null && part.getBody() != null) { 264 InputStream in = part.getBody().getInputStream(); 265 String mimeType = part.getMimeType(); 266 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) { 267 /* 268 * Now we read the part into a buffer for further processing. Because 269 * the stream is now wrapped we'll remove any transfer encoding at this point. 270 */ 271 ByteArrayOutputStream out = new ByteArrayOutputStream(); 272 IOUtils.copy(in, out); 273 in.close(); 274 in = null; // we want all of our memory back, and close might not release 275 276 /* 277 * We've got a text part, so let's see if it needs to be processed further. 278 */ 279 String charset = getHeaderParameter(part.getContentType(), "charset"); 280 if (charset != null) { 281 /* 282 * See if there is conversion from the MIME charset to the Java one. 283 */ 284 charset = CharsetUtil.toJavaCharset(charset); 285 } 286 /* 287 * No encoding, so use us-ascii, which is the standard. 288 */ 289 if (charset == null) { 290 charset = "ASCII"; 291 } 292 /* 293 * Convert and return as new String 294 */ 295 String result = out.toString(charset); 296 out.close(); 297 return result; 298 } 299 } 300 301 } 302 catch (OutOfMemoryError oom) { 303 /* 304 * If we are not able to process the body there's nothing we can do about it. Return 305 * null and let the upper layers handle the missing content. 306 */ 307 Log.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString()); 308 } 309 catch (Exception e) { 310 /* 311 * If we are not able to process the body there's nothing we can do about it. Return 312 * null and let the upper layers handle the missing content. 313 */ 314 Log.e(LOG_TAG, "Unable to getTextFromPart " + e.toString()); 315 } 316 return null; 317 } 318 319 /** 320 * Returns true if the given mimeType matches the matchAgainst specification. The comparison 321 * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*"). 322 * 323 * @param mimeType A MIME type to check. 324 * @param matchAgainst A MIME type to check against. May include wildcards. 325 * @return true if the mimeType matches 326 */ 327 public static boolean mimeTypeMatches(String mimeType, String matchAgainst) { 328 Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), 329 Pattern.CASE_INSENSITIVE); 330 return p.matcher(mimeType).matches(); 331 } 332 333 /** 334 * Returns true if the given mimeType matches any of the matchAgainst specifications. The 335 * comparison ignores case and the matchAgainst strings may include "*" for a wildcard 336 * (e.g. "image/*"). 337 * 338 * @param mimeType A MIME type to check. 339 * @param matchAgainst An array of MIME types to check against. May include wildcards. 340 * @return true if the mimeType matches any of the matchAgainst strings 341 */ 342 public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) { 343 for (String matchType : matchAgainst) { 344 if (mimeTypeMatches(mimeType, matchType)) { 345 return true; 346 } 347 } 348 return false; 349 } 350 351 /** 352 * Given an input stream and a transfer encoding, return a wrapped input stream for that 353 * encoding (or the original if none is required) 354 * @param in the input stream 355 * @param contentTransferEncoding the content transfer encoding 356 * @return a properly wrapped stream 357 */ 358 public static InputStream getInputStreamForContentTransferEncoding(InputStream in, 359 String contentTransferEncoding) { 360 if (contentTransferEncoding != null) { 361 contentTransferEncoding = 362 MimeUtility.getHeaderParameter(contentTransferEncoding, null); 363 if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) { 364 in = new QuotedPrintableInputStream(in); 365 } 366 else if ("base64".equalsIgnoreCase(contentTransferEncoding)) { 367 in = new Base64InputStream(in, Base64.DEFAULT); 368 } 369 } 370 return in; 371 } 372 373 /** 374 * Removes any content transfer encoding from the stream and returns a Body. 375 */ 376 public static Body decodeBody(InputStream in, String contentTransferEncoding) 377 throws IOException { 378 /* 379 * We'll remove any transfer encoding by wrapping the stream. 380 */ 381 in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding); 382 BinaryTempFileBody tempBody = new BinaryTempFileBody(); 383 OutputStream out = tempBody.getOutputStream(); 384 try { 385 IOUtils.copy(in, out); 386 } catch (Base64DataException bde) { 387 // TODO Need to fix this somehow 388 //String warning = "\n\n" + Email.getMessageDecodeErrorString(); 389 //out.write(warning.getBytes()); 390 } finally { 391 out.close(); 392 } 393 return tempBody; 394 } 395 396 /** 397 * Recursively scan a Part (usually a Message) and sort out which of its children will be 398 * "viewable" and which will be attachments. 399 * 400 * @param part The part to be broken down 401 * @param viewables This arraylist will be populated with all parts that appear to be 402 * the "message" (e.g. text/plain & text/html) 403 * @param attachments This arraylist will be populated with all parts that appear to be 404 * attachments (including inlines) 405 * @throws MessagingException 406 */ 407 public static void collectParts(Part part, ArrayList<Part> viewables, 408 ArrayList<Part> attachments) throws MessagingException { 409 String disposition = part.getDisposition(); 410 String dispositionType = MimeUtility.getHeaderParameter(disposition, null); 411 // If a disposition is not specified, default to "inline" 412 boolean inline = 413 TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType); 414 // The lower-case mime type 415 String mimeType = part.getMimeType().toLowerCase(); 416 417 if (part.getBody() instanceof Multipart) { 418 // If the part is Multipart but not alternative it's either mixed or 419 // something we don't know about, which means we treat it as mixed 420 // per the spec. We just process its pieces recursively. 421 MimeMultipart mp = (MimeMultipart)part.getBody(); 422 boolean foundHtml = false; 423 if (mp.getSubTypeForTest().equals("alternative")) { 424 for (int i = 0; i < mp.getCount(); i++) { 425 if (mp.getBodyPart(i).isMimeType("text/html")) { 426 foundHtml = true; 427 break; 428 } 429 } 430 } 431 for (int i = 0; i < mp.getCount(); i++) { 432 // See if we have text and html 433 BodyPart bp = mp.getBodyPart(i); 434 // If there's html, don't bother loading text 435 if (foundHtml && bp.isMimeType("text/plain")) { 436 continue; 437 } 438 collectParts(bp, viewables, attachments); 439 } 440 } else if (part.getBody() instanceof Message) { 441 // If the part is an embedded message we just continue to process 442 // it, pulling any viewables or attachments into the running list. 443 Message message = (Message)part.getBody(); 444 collectParts(message, viewables, attachments); 445 } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) { 446 // We'll treat text and images as viewables 447 viewables.add(part); 448 } else { 449 // Everything else is an attachment. 450 attachments.add(part); 451 } 452 } 453 } 454