1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.email.mail.internet; 18 19 import com.android.email.Email; 20 import com.android.email.mail.Body; 21 import com.android.email.mail.BodyPart; 22 import com.android.email.mail.Message; 23 import com.android.email.mail.MessagingException; 24 import com.android.email.mail.Multipart; 25 import com.android.email.mail.Part; 26 27 import org.apache.commons.io.IOUtils; 28 import org.apache.james.mime4j.codec.EncoderUtil; 29 import org.apache.james.mime4j.decoder.Base64InputStream; 30 import org.apache.james.mime4j.decoder.DecoderUtil; 31 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; 32 import org.apache.james.mime4j.util.CharsetUtil; 33 34 import android.util.Log; 35 36 import java.io.ByteArrayOutputStream; 37 import java.io.IOException; 38 import java.io.InputStream; 39 import java.io.OutputStream; 40 import java.util.ArrayList; 41 import java.util.regex.Matcher; 42 import java.util.regex.Pattern; 43 44 public class MimeUtility { 45 46 public static final String MIME_TYPE_RFC822 = "message/rfc822"; 47 private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n"); 48 49 /** 50 * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string 51 * object whenever possible. 52 */ 53 public static String unfold(String s) { 54 if (s == null) { 55 return null; 56 } 57 Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s); 58 if (patternMatcher.find()) { 59 patternMatcher.reset(); 60 s = patternMatcher.replaceAll(""); 61 } 62 return s; 63 } 64 65 public static String decode(String s) { 66 if (s == null) { 67 return null; 68 } 69 return DecoderUtil.decodeEncodedWords(s); 70 } 71 72 public static String unfoldAndDecode(String s) { 73 return decode(unfold(s)); 74 } 75 76 // TODO implement proper foldAndEncode 77 // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent 78 // duplication of encoding. 79 public static String foldAndEncode(String s) { 80 return s; 81 } 82 83 /** 84 * INTERIM version of foldAndEncode that will be used only by Subject: headers. 85 * This is safer than implementing foldAndEncode() (see above) and risking unknown damage 86 * to other headers. 87 * 88 * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK. 89 * 90 * @param s original string to encode and fold 91 * @param usedCharacters number of characters already used up by header name 92 93 * @return the String ready to be transmitted 94 */ 95 public static String foldAndEncode2(String s, int usedCharacters) { 96 // james.mime4j.codec.EncoderUtil.java 97 // encode: encodeIfNecessary(text, usage, numUsedInHeaderName) 98 // Usage.TEXT_TOKENlooks like the right thing for subjects 99 // use WORD_ENTITY for address/names 100 101 String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, 102 usedCharacters); 103 104 return fold(encoded, usedCharacters); 105 } 106 107 /** 108 * INTERIM: From newer version of org.apache.james (but we don't want to import 109 * the entire MimeUtil class). 110 * 111 * Splits the specified string into a multiple-line representation with 112 * lines no longer than 76 characters (because the line might contain 113 * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 114 * 2047</a> section 2). If the string contains non-whitespace sequences 115 * longer than 76 characters a line break is inserted at the whitespace 116 * character following the sequence resulting in a line longer than 76 117 * characters. 118 * 119 * @param s 120 * string to split. 121 * @param usedCharacters 122 * number of characters already used up. Usually the number of 123 * characters for header field name plus colon and one space. 124 * @return a multiple-line representation of the given string. 125 */ 126 public static String fold(String s, int usedCharacters) { 127 final int maxCharacters = 76; 128 129 final int length = s.length(); 130 if (usedCharacters + length <= maxCharacters) 131 return s; 132 133 StringBuilder sb = new StringBuilder(); 134 135 int lastLineBreak = -usedCharacters; 136 int wspIdx = indexOfWsp(s, 0); 137 while (true) { 138 if (wspIdx == length) { 139 sb.append(s.substring(Math.max(0, lastLineBreak))); 140 return sb.toString(); 141 } 142 143 int nextWspIdx = indexOfWsp(s, wspIdx + 1); 144 145 if (nextWspIdx - lastLineBreak > maxCharacters) { 146 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); 147 sb.append("\r\n"); 148 lastLineBreak = wspIdx; 149 } 150 151 wspIdx = nextWspIdx; 152 } 153 } 154 155 /** 156 * INTERIM: From newer version of org.apache.james (but we don't want to import 157 * the entire MimeUtil class). 158 * 159 * Search for whitespace. 160 */ 161 private static int indexOfWsp(String s, int fromIndex) { 162 final int len = s.length(); 163 for (int index = fromIndex; index < len; index++) { 164 char c = s.charAt(index); 165 if (c == ' ' || c == '\t') 166 return index; 167 } 168 return len; 169 } 170 171 /** 172 * Returns the named parameter of a header field. If name is null the first 173 * parameter is returned, or if there are no additional parameters in the 174 * field the entire field is returned. Otherwise the named parameter is 175 * searched for in a case insensitive fashion and returned. If the parameter 176 * cannot be found the method returns null. 177 * 178 * TODO: quite inefficient with the inner trimming & splitting. 179 * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive. 180 * TODO: The doc says that for a null name you get the first param, but you get the header. 181 * Should probably just fix the doc, but if other code assumes that behavior, fix the code. 182 * TODO: Need to decode %-escaped strings, as in: filename="ab%22d". 183 * ('+' -> ' ' conversion too? check RFC) 184 * 185 * @param header 186 * @param name 187 * @return the entire header (if name=null), the found parameter, or null 188 */ 189 public static String getHeaderParameter(String header, String name) { 190 if (header == null) { 191 return null; 192 } 193 String[] parts = unfold(header).split(";"); 194 if (name == null) { 195 return parts[0].trim(); 196 } 197 String lowerCaseName = name.toLowerCase(); 198 for (String part : parts) { 199 if (part.trim().toLowerCase().startsWith(lowerCaseName)) { 200 String[] parameterParts = part.split("=", 2); 201 if (parameterParts.length < 2) { 202 return null; 203 } 204 String parameter = parameterParts[1].trim(); 205 if (parameter.startsWith("\"") && parameter.endsWith("\"")) { 206 return parameter.substring(1, parameter.length() - 1); 207 } else { 208 return parameter; 209 } 210 } 211 } 212 return null; 213 } 214 215 public static Part findFirstPartByMimeType(Part part, String mimeType) 216 throws MessagingException { 217 if (part.getBody() instanceof Multipart) { 218 Multipart multipart = (Multipart)part.getBody(); 219 for (int i = 0, count = multipart.getCount(); i < count; i++) { 220 BodyPart bodyPart = multipart.getBodyPart(i); 221 Part ret = findFirstPartByMimeType(bodyPart, mimeType); 222 if (ret != null) { 223 return ret; 224 } 225 } 226 } 227 else if (part.getMimeType().equalsIgnoreCase(mimeType)) { 228 return part; 229 } 230 return null; 231 } 232 233 public static Part findPartByContentId(Part part, String contentId) throws Exception { 234 if (part.getBody() instanceof Multipart) { 235 Multipart multipart = (Multipart)part.getBody(); 236 for (int i = 0, count = multipart.getCount(); i < count; i++) { 237 BodyPart bodyPart = multipart.getBodyPart(i); 238 Part ret = findPartByContentId(bodyPart, contentId); 239 if (ret != null) { 240 return ret; 241 } 242 } 243 } 244 String cid = part.getContentId(); 245 if (contentId.equals(cid)) { 246 return part; 247 } 248 return null; 249 } 250 251 /** 252 * Reads the Part's body and returns a String based on any charset conversion that needed 253 * to be done. 254 * @param part The part containing a body 255 * @return a String containing the converted text in the body, or null if there was no text 256 * or an error during conversion. 257 */ 258 public static String getTextFromPart(Part part) { 259 try { 260 if (part != null && part.getBody() != null) { 261 InputStream in = part.getBody().getInputStream(); 262 String mimeType = part.getMimeType(); 263 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) { 264 /* 265 * Now we read the part into a buffer for further processing. Because 266 * the stream is now wrapped we'll remove any transfer encoding at this point. 267 */ 268 ByteArrayOutputStream out = new ByteArrayOutputStream(); 269 IOUtils.copy(in, out); 270 in.close(); 271 in = null; // we want all of our memory back, and close might not release 272 273 /* 274 * We've got a text part, so let's see if it needs to be processed further. 275 */ 276 String charset = getHeaderParameter(part.getContentType(), "charset"); 277 if (charset != null) { 278 /* 279 * See if there is conversion from the MIME charset to the Java one. 280 */ 281 charset = CharsetUtil.toJavaCharset(charset); 282 } 283 /* 284 * No encoding, so use us-ascii, which is the standard. 285 */ 286 if (charset == null) { 287 charset = "ASCII"; 288 } 289 /* 290 * Convert and return as new String 291 */ 292 String result = out.toString(charset); 293 out.close(); 294 return result; 295 } 296 } 297 298 } 299 catch (OutOfMemoryError oom) { 300 /* 301 * If we are not able to process the body there's nothing we can do about it. Return 302 * null and let the upper layers handle the missing content. 303 */ 304 Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + oom.toString()); 305 } 306 catch (Exception e) { 307 /* 308 * If we are not able to process the body there's nothing we can do about it. Return 309 * null and let the upper layers handle the missing content. 310 */ 311 Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + e.toString()); 312 } 313 return null; 314 } 315 316 /** 317 * Returns true if the given mimeType matches the matchAgainst specification. The comparison 318 * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*"). 319 * 320 * @param mimeType A MIME type to check. 321 * @param matchAgainst A MIME type to check against. May include wildcards. 322 * @return true if the mimeType matches 323 */ 324 public static boolean mimeTypeMatches(String mimeType, String matchAgainst) { 325 Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), 326 Pattern.CASE_INSENSITIVE); 327 return p.matcher(mimeType).matches(); 328 } 329 330 /** 331 * Returns true if the given mimeType matches any of the matchAgainst specifications. The 332 * comparison ignores case and the matchAgainst strings may include "*" for a wildcard 333 * (e.g. "image/*"). 334 * 335 * @param mimeType A MIME type to check. 336 * @param matchAgainst An array of MIME types to check against. May include wildcards. 337 * @return true if the mimeType matches any of the matchAgainst strings 338 */ 339 public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) { 340 for (String matchType : matchAgainst) { 341 if (mimeTypeMatches(mimeType, matchType)) { 342 return true; 343 } 344 } 345 return false; 346 } 347 348 /** 349 * Removes any content transfer encoding from the stream and returns a Body. 350 */ 351 public static Body decodeBody(InputStream in, String contentTransferEncoding) 352 throws IOException { 353 /* 354 * We'll remove any transfer encoding by wrapping the stream. 355 */ 356 if (contentTransferEncoding != null) { 357 contentTransferEncoding = 358 MimeUtility.getHeaderParameter(contentTransferEncoding, null); 359 if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) { 360 in = new QuotedPrintableInputStream(in); 361 } 362 else if ("base64".equalsIgnoreCase(contentTransferEncoding)) { 363 in = new Base64InputStream(in); 364 } 365 } 366 367 BinaryTempFileBody tempBody = new BinaryTempFileBody(); 368 OutputStream out = tempBody.getOutputStream(); 369 IOUtils.copy(in, out); 370 out.close(); 371 return tempBody; 372 } 373 374 /** 375 * An unfortunately named method that makes decisions about a Part (usually a Message) 376 * as to which of it's children will be "viewable" and which will be attachments. 377 * The method recursively sorts the viewables and attachments into seperate 378 * lists for further processing. 379 * @param part 380 * @param viewables 381 * @param attachments 382 * @throws MessagingException 383 */ 384 public static void collectParts(Part part, ArrayList<Part> viewables, 385 ArrayList<Part> attachments) throws MessagingException { 386 String disposition = part.getDisposition(); 387 String dispositionType = null; 388 String dispositionFilename = null; 389 if (disposition != null) { 390 dispositionType = MimeUtility.getHeaderParameter(disposition, null); 391 dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename"); 392 } 393 394 /* 395 * A best guess that this part is intended to be an attachment and not inline. 396 */ 397 boolean attachment = ("attachment".equalsIgnoreCase(dispositionType)) 398 || (dispositionFilename != null) 399 && (!"inline".equalsIgnoreCase(dispositionType)); 400 401 /* 402 * If the part is Multipart but not alternative it's either mixed or 403 * something we don't know about, which means we treat it as mixed 404 * per the spec. We just process it's pieces recursively. 405 */ 406 if (part.getBody() instanceof Multipart) { 407 Multipart mp = (Multipart)part.getBody(); 408 for (int i = 0; i < mp.getCount(); i++) { 409 collectParts(mp.getBodyPart(i), viewables, attachments); 410 } 411 } 412 /* 413 * If the part is an embedded message we just continue to process 414 * it, pulling any viewables or attachments into the running list. 415 */ 416 else if (part.getBody() instanceof Message) { 417 Message message = (Message)part.getBody(); 418 collectParts(message, viewables, attachments); 419 } 420 /* 421 * If the part is HTML and it got this far it's part of a mixed (et 422 * al) and should be rendered inline. 423 */ 424 else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/html"))) { 425 viewables.add(part); 426 } 427 /* 428 * If the part is plain text and it got this far it's part of a 429 * mixed (et al) and should be rendered inline. 430 */ 431 else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/plain"))) { 432 viewables.add(part); 433 } 434 /* 435 * Finally, if it's nothing else we will include it as an attachment. 436 */ 437 else { 438 attachments.add(part); 439 } 440 } 441 } 442