1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.email.mail.internet; 18 19 import com.android.email.Email; 20 import com.android.email.mail.Body; 21 import com.android.email.mail.BodyPart; 22 import com.android.email.mail.Message; 23 import com.android.email.mail.MessagingException; 24 import com.android.email.mail.Multipart; 25 import com.android.email.mail.Part; 26 27 import org.apache.commons.io.IOUtils; 28 import org.apache.james.mime4j.codec.EncoderUtil; 29 import org.apache.james.mime4j.decoder.DecoderUtil; 30 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; 31 import org.apache.james.mime4j.util.CharsetUtil; 32 33 import android.util.Log; 34 import android.util.Base64; 35 import android.util.Base64InputStream; 36 37 import java.io.ByteArrayOutputStream; 38 import java.io.IOException; 39 import java.io.InputStream; 40 import java.io.OutputStream; 41 import java.util.ArrayList; 42 import java.util.regex.Matcher; 43 import java.util.regex.Pattern; 44 45 public class MimeUtility { 46 47 public static final String MIME_TYPE_RFC822 = "message/rfc822"; 48 private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n"); 49 50 /** 51 * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string 52 * object whenever possible. 53 */ 54 public static String unfold(String s) { 55 if (s == null) { 56 return null; 57 } 58 Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s); 59 if (patternMatcher.find()) { 60 patternMatcher.reset(); 61 s = patternMatcher.replaceAll(""); 62 } 63 return s; 64 } 65 66 public static String decode(String s) { 67 if (s == null) { 68 return null; 69 } 70 return DecoderUtil.decodeEncodedWords(s); 71 } 72 73 public static String unfoldAndDecode(String s) { 74 return decode(unfold(s)); 75 } 76 77 // TODO implement proper foldAndEncode 78 // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent 79 // duplication of encoding. 80 public static String foldAndEncode(String s) { 81 return s; 82 } 83 84 /** 85 * INTERIM version of foldAndEncode that will be used only by Subject: headers. 86 * This is safer than implementing foldAndEncode() (see above) and risking unknown damage 87 * to other headers. 88 * 89 * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK. 90 * 91 * @param s original string to encode and fold 92 * @param usedCharacters number of characters already used up by header name 93 94 * @return the String ready to be transmitted 95 */ 96 public static String foldAndEncode2(String s, int usedCharacters) { 97 // james.mime4j.codec.EncoderUtil.java 98 // encode: encodeIfNecessary(text, usage, numUsedInHeaderName) 99 // Usage.TEXT_TOKENlooks like the right thing for subjects 100 // use WORD_ENTITY for address/names 101 102 String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, 103 usedCharacters); 104 105 return fold(encoded, usedCharacters); 106 } 107 108 /** 109 * INTERIM: From newer version of org.apache.james (but we don't want to import 110 * the entire MimeUtil class). 111 * 112 * Splits the specified string into a multiple-line representation with 113 * lines no longer than 76 characters (because the line might contain 114 * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 115 * 2047</a> section 2). If the string contains non-whitespace sequences 116 * longer than 76 characters a line break is inserted at the whitespace 117 * character following the sequence resulting in a line longer than 76 118 * characters. 119 * 120 * @param s 121 * string to split. 122 * @param usedCharacters 123 * number of characters already used up. Usually the number of 124 * characters for header field name plus colon and one space. 125 * @return a multiple-line representation of the given string. 126 */ 127 public static String fold(String s, int usedCharacters) { 128 final int maxCharacters = 76; 129 130 final int length = s.length(); 131 if (usedCharacters + length <= maxCharacters) 132 return s; 133 134 StringBuilder sb = new StringBuilder(); 135 136 int lastLineBreak = -usedCharacters; 137 int wspIdx = indexOfWsp(s, 0); 138 while (true) { 139 if (wspIdx == length) { 140 sb.append(s.substring(Math.max(0, lastLineBreak))); 141 return sb.toString(); 142 } 143 144 int nextWspIdx = indexOfWsp(s, wspIdx + 1); 145 146 if (nextWspIdx - lastLineBreak > maxCharacters) { 147 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); 148 sb.append("\r\n"); 149 lastLineBreak = wspIdx; 150 } 151 152 wspIdx = nextWspIdx; 153 } 154 } 155 156 /** 157 * INTERIM: From newer version of org.apache.james (but we don't want to import 158 * the entire MimeUtil class). 159 * 160 * Search for whitespace. 161 */ 162 private static int indexOfWsp(String s, int fromIndex) { 163 final int len = s.length(); 164 for (int index = fromIndex; index < len; index++) { 165 char c = s.charAt(index); 166 if (c == ' ' || c == '\t') 167 return index; 168 } 169 return len; 170 } 171 172 /** 173 * Returns the named parameter of a header field. If name is null the first 174 * parameter is returned, or if there are no additional parameters in the 175 * field the entire field is returned. Otherwise the named parameter is 176 * searched for in a case insensitive fashion and returned. If the parameter 177 * cannot be found the method returns null. 178 * 179 * TODO: quite inefficient with the inner trimming & splitting. 180 * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive. 181 * TODO: The doc says that for a null name you get the first param, but you get the header. 182 * Should probably just fix the doc, but if other code assumes that behavior, fix the code. 183 * TODO: Need to decode %-escaped strings, as in: filename="ab%22d". 184 * ('+' -> ' ' conversion too? check RFC) 185 * 186 * @param header 187 * @param name 188 * @return the entire header (if name=null), the found parameter, or null 189 */ 190 public static String getHeaderParameter(String header, String name) { 191 if (header == null) { 192 return null; 193 } 194 String[] parts = unfold(header).split(";"); 195 if (name == null) { 196 return parts[0].trim(); 197 } 198 String lowerCaseName = name.toLowerCase(); 199 for (String part : parts) { 200 if (part.trim().toLowerCase().startsWith(lowerCaseName)) { 201 String[] parameterParts = part.split("=", 2); 202 if (parameterParts.length < 2) { 203 return null; 204 } 205 String parameter = parameterParts[1].trim(); 206 if (parameter.startsWith("\"") && parameter.endsWith("\"")) { 207 return parameter.substring(1, parameter.length() - 1); 208 } else { 209 return parameter; 210 } 211 } 212 } 213 return null; 214 } 215 216 public static Part findFirstPartByMimeType(Part part, String mimeType) 217 throws MessagingException { 218 if (part.getBody() instanceof Multipart) { 219 Multipart multipart = (Multipart)part.getBody(); 220 for (int i = 0, count = multipart.getCount(); i < count; i++) { 221 BodyPart bodyPart = multipart.getBodyPart(i); 222 Part ret = findFirstPartByMimeType(bodyPart, mimeType); 223 if (ret != null) { 224 return ret; 225 } 226 } 227 } 228 else if (part.getMimeType().equalsIgnoreCase(mimeType)) { 229 return part; 230 } 231 return null; 232 } 233 234 public static Part findPartByContentId(Part part, String contentId) throws Exception { 235 if (part.getBody() instanceof Multipart) { 236 Multipart multipart = (Multipart)part.getBody(); 237 for (int i = 0, count = multipart.getCount(); i < count; i++) { 238 BodyPart bodyPart = multipart.getBodyPart(i); 239 Part ret = findPartByContentId(bodyPart, contentId); 240 if (ret != null) { 241 return ret; 242 } 243 } 244 } 245 String cid = part.getContentId(); 246 if (contentId.equals(cid)) { 247 return part; 248 } 249 return null; 250 } 251 252 /** 253 * Reads the Part's body and returns a String based on any charset conversion that needed 254 * to be done. 255 * @param part The part containing a body 256 * @return a String containing the converted text in the body, or null if there was no text 257 * or an error during conversion. 258 */ 259 public static String getTextFromPart(Part part) { 260 try { 261 if (part != null && part.getBody() != null) { 262 InputStream in = part.getBody().getInputStream(); 263 String mimeType = part.getMimeType(); 264 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) { 265 /* 266 * Now we read the part into a buffer for further processing. Because 267 * the stream is now wrapped we'll remove any transfer encoding at this point. 268 */ 269 ByteArrayOutputStream out = new ByteArrayOutputStream(); 270 IOUtils.copy(in, out); 271 in.close(); 272 in = null; // we want all of our memory back, and close might not release 273 274 /* 275 * We've got a text part, so let's see if it needs to be processed further. 276 */ 277 String charset = getHeaderParameter(part.getContentType(), "charset"); 278 if (charset != null) { 279 /* 280 * See if there is conversion from the MIME charset to the Java one. 281 */ 282 charset = CharsetUtil.toJavaCharset(charset); 283 } 284 /* 285 * No encoding, so use us-ascii, which is the standard. 286 */ 287 if (charset == null) { 288 charset = "ASCII"; 289 } 290 /* 291 * Convert and return as new String 292 */ 293 String result = out.toString(charset); 294 out.close(); 295 return result; 296 } 297 } 298 299 } 300 catch (OutOfMemoryError oom) { 301 /* 302 * If we are not able to process the body there's nothing we can do about it. Return 303 * null and let the upper layers handle the missing content. 304 */ 305 Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + oom.toString()); 306 } 307 catch (Exception e) { 308 /* 309 * If we are not able to process the body there's nothing we can do about it. Return 310 * null and let the upper layers handle the missing content. 311 */ 312 Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + e.toString()); 313 } 314 return null; 315 } 316 317 /** 318 * Returns true if the given mimeType matches the matchAgainst specification. The comparison 319 * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*"). 320 * 321 * @param mimeType A MIME type to check. 322 * @param matchAgainst A MIME type to check against. May include wildcards. 323 * @return true if the mimeType matches 324 */ 325 public static boolean mimeTypeMatches(String mimeType, String matchAgainst) { 326 Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), 327 Pattern.CASE_INSENSITIVE); 328 return p.matcher(mimeType).matches(); 329 } 330 331 /** 332 * Returns true if the given mimeType matches any of the matchAgainst specifications. The 333 * comparison ignores case and the matchAgainst strings may include "*" for a wildcard 334 * (e.g. "image/*"). 335 * 336 * @param mimeType A MIME type to check. 337 * @param matchAgainst An array of MIME types to check against. May include wildcards. 338 * @return true if the mimeType matches any of the matchAgainst strings 339 */ 340 public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) { 341 for (String matchType : matchAgainst) { 342 if (mimeTypeMatches(mimeType, matchType)) { 343 return true; 344 } 345 } 346 return false; 347 } 348 349 /** 350 * Removes any content transfer encoding from the stream and returns a Body. 351 */ 352 public static Body decodeBody(InputStream in, String contentTransferEncoding) 353 throws IOException { 354 /* 355 * We'll remove any transfer encoding by wrapping the stream. 356 */ 357 if (contentTransferEncoding != null) { 358 contentTransferEncoding = 359 MimeUtility.getHeaderParameter(contentTransferEncoding, null); 360 if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) { 361 in = new QuotedPrintableInputStream(in); 362 } 363 else if ("base64".equalsIgnoreCase(contentTransferEncoding)) { 364 in = new Base64InputStream(in, Base64.DEFAULT); 365 } 366 } 367 368 BinaryTempFileBody tempBody = new BinaryTempFileBody(); 369 OutputStream out = tempBody.getOutputStream(); 370 IOUtils.copy(in, out); 371 out.close(); 372 return tempBody; 373 } 374 375 /** 376 * An unfortunately named method that makes decisions about a Part (usually a Message) 377 * as to which of it's children will be "viewable" and which will be attachments. 378 * The method recursively sorts the viewables and attachments into seperate 379 * lists for further processing. 380 * @param part 381 * @param viewables 382 * @param attachments 383 * @throws MessagingException 384 */ 385 public static void collectParts(Part part, ArrayList<Part> viewables, 386 ArrayList<Part> attachments) throws MessagingException { 387 String disposition = part.getDisposition(); 388 String dispositionType = null; 389 String dispositionFilename = null; 390 if (disposition != null) { 391 dispositionType = MimeUtility.getHeaderParameter(disposition, null); 392 dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename"); 393 } 394 395 /* 396 * A best guess that this part is intended to be an attachment and not inline. 397 */ 398 boolean attachment = ("attachment".equalsIgnoreCase(dispositionType)) 399 || (dispositionFilename != null) 400 && (!"inline".equalsIgnoreCase(dispositionType)); 401 402 /* 403 * If the part is Multipart but not alternative it's either mixed or 404 * something we don't know about, which means we treat it as mixed 405 * per the spec. We just process it's pieces recursively. 406 */ 407 if (part.getBody() instanceof Multipart) { 408 Multipart mp = (Multipart)part.getBody(); 409 for (int i = 0; i < mp.getCount(); i++) { 410 collectParts(mp.getBodyPart(i), viewables, attachments); 411 } 412 } 413 /* 414 * If the part is an embedded message we just continue to process 415 * it, pulling any viewables or attachments into the running list. 416 */ 417 else if (part.getBody() instanceof Message) { 418 Message message = (Message)part.getBody(); 419 collectParts(message, viewables, attachments); 420 } 421 /* 422 * If the part is HTML and it got this far it's part of a mixed (et 423 * al) and should be rendered inline. 424 */ 425 else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/html"))) { 426 viewables.add(part); 427 } 428 /* 429 * If the part is plain text and it got this far it's part of a 430 * mixed (et al) and should be rendered inline. 431 */ 432 else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/plain"))) { 433 viewables.add(part); 434 } 435 /* 436 * Finally, if it's nothing else we will include it as an attachment. 437 */ 438 else { 439 attachments.add(part); 440 } 441 } 442 } 443