Home | History | Annotate | Download | only in internet
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.emailcommon.internet;
     18 
     19 import android.text.TextUtils;
     20 import android.util.Base64;
     21 import android.util.Base64DataException;
     22 import android.util.Base64InputStream;
     23 import android.util.Log;
     24 
     25 import com.android.emailcommon.mail.Body;
     26 import com.android.emailcommon.mail.BodyPart;
     27 import com.android.emailcommon.mail.Message;
     28 import com.android.emailcommon.mail.MessagingException;
     29 import com.android.emailcommon.mail.Multipart;
     30 import com.android.emailcommon.mail.Part;
     31 
     32 import org.apache.commons.io.IOUtils;
     33 import org.apache.james.mime4j.codec.EncoderUtil;
     34 import org.apache.james.mime4j.decoder.DecoderUtil;
     35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     36 import org.apache.james.mime4j.util.CharsetUtil;
     37 
     38 import java.io.ByteArrayOutputStream;
     39 import java.io.IOException;
     40 import java.io.InputStream;
     41 import java.io.OutputStream;
     42 import java.util.ArrayList;
     43 import java.util.regex.Matcher;
     44 import java.util.regex.Pattern;
     45 
     46 public class MimeUtility {
     47     private static final String LOG_TAG = "Email";
     48 
     49     public static final String MIME_TYPE_RFC822 = "message/rfc822";
     50     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
     51 
     52     /**
     53      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
     54      * object whenever possible.
     55      */
     56     public static String unfold(String s) {
     57         if (s == null) {
     58             return null;
     59         }
     60         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
     61         if (patternMatcher.find()) {
     62             patternMatcher.reset();
     63             s = patternMatcher.replaceAll("");
     64         }
     65         return s;
     66     }
     67 
     68     public static String decode(String s) {
     69         if (s == null) {
     70             return null;
     71         }
     72         return DecoderUtil.decodeEncodedWords(s);
     73     }
     74 
     75     public static String unfoldAndDecode(String s) {
     76         return decode(unfold(s));
     77     }
     78 
     79     // TODO implement proper foldAndEncode
     80     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
     81     // duplication of encoding.
     82     public static String foldAndEncode(String s) {
     83         return s;
     84     }
     85 
     86     /**
     87      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
     88      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
     89      * to other headers.
     90      *
     91      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
     92      *
     93      * @param s original string to encode and fold
     94      * @param usedCharacters number of characters already used up by header name
     95 
     96      * @return the String ready to be transmitted
     97      */
     98     public static String foldAndEncode2(String s, int usedCharacters) {
     99         // james.mime4j.codec.EncoderUtil.java
    100         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
    101         // Usage.TEXT_TOKENlooks like the right thing for subjects
    102         // use WORD_ENTITY for address/names
    103 
    104         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
    105                 usedCharacters);
    106 
    107         return fold(encoded, usedCharacters);
    108     }
    109 
    110     /**
    111      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    112      * the entire MimeUtil class).
    113      *
    114      * Splits the specified string into a multiple-line representation with
    115      * lines no longer than 76 characters (because the line might contain
    116      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
    117      * 2047</a> section 2). If the string contains non-whitespace sequences
    118      * longer than 76 characters a line break is inserted at the whitespace
    119      * character following the sequence resulting in a line longer than 76
    120      * characters.
    121      *
    122      * @param s
    123      *            string to split.
    124      * @param usedCharacters
    125      *            number of characters already used up. Usually the number of
    126      *            characters for header field name plus colon and one space.
    127      * @return a multiple-line representation of the given string.
    128      */
    129     public static String fold(String s, int usedCharacters) {
    130         final int maxCharacters = 76;
    131 
    132         final int length = s.length();
    133         if (usedCharacters + length <= maxCharacters)
    134             return s;
    135 
    136         StringBuilder sb = new StringBuilder();
    137 
    138         int lastLineBreak = -usedCharacters;
    139         int wspIdx = indexOfWsp(s, 0);
    140         while (true) {
    141             if (wspIdx == length) {
    142                 sb.append(s.substring(Math.max(0, lastLineBreak)));
    143                 return sb.toString();
    144             }
    145 
    146             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
    147 
    148             if (nextWspIdx - lastLineBreak > maxCharacters) {
    149                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
    150                 sb.append("\r\n");
    151                 lastLineBreak = wspIdx;
    152             }
    153 
    154             wspIdx = nextWspIdx;
    155         }
    156     }
    157 
    158     /**
    159      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    160      * the entire MimeUtil class).
    161      *
    162      * Search for whitespace.
    163      */
    164     private static int indexOfWsp(String s, int fromIndex) {
    165         final int len = s.length();
    166         for (int index = fromIndex; index < len; index++) {
    167             char c = s.charAt(index);
    168             if (c == ' ' || c == '\t')
    169                 return index;
    170         }
    171         return len;
    172     }
    173 
    174     /**
    175      * Returns the named parameter of a header field. If name is null the first
    176      * parameter is returned, or if there are no additional parameters in the
    177      * field the entire field is returned. Otherwise the named parameter is
    178      * searched for in a case insensitive fashion and returned. If the parameter
    179      * cannot be found the method returns null.
    180      *
    181      * TODO: quite inefficient with the inner trimming & splitting.
    182      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
    183      * TODO: The doc says that for a null name you get the first param, but you get the header.
    184      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
    185      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
    186      *       ('+' -> ' ' conversion too? check RFC)
    187      *
    188      * @param header
    189      * @param name
    190      * @return the entire header (if name=null), the found parameter, or null
    191      */
    192     public static String getHeaderParameter(String header, String name) {
    193         if (header == null) {
    194             return null;
    195         }
    196         String[] parts = unfold(header).split(";");
    197         if (name == null) {
    198             return parts[0].trim();
    199         }
    200         String lowerCaseName = name.toLowerCase();
    201         for (String part : parts) {
    202             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
    203                 String[] parameterParts = part.split("=", 2);
    204                 if (parameterParts.length < 2) {
    205                     return null;
    206                 }
    207                 String parameter = parameterParts[1].trim();
    208                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
    209                     return parameter.substring(1, parameter.length() - 1);
    210                 } else {
    211                     return parameter;
    212                 }
    213             }
    214         }
    215         return null;
    216     }
    217 
    218     public static Part findFirstPartByMimeType(Part part, String mimeType)
    219             throws MessagingException {
    220         if (part.getBody() instanceof Multipart) {
    221             Multipart multipart = (Multipart)part.getBody();
    222             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    223                 BodyPart bodyPart = multipart.getBodyPart(i);
    224                 Part ret = findFirstPartByMimeType(bodyPart, mimeType);
    225                 if (ret != null) {
    226                     return ret;
    227                 }
    228             }
    229         }
    230         else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
    231             return part;
    232         }
    233         return null;
    234     }
    235 
    236     public static Part findPartByContentId(Part part, String contentId) throws Exception {
    237         if (part.getBody() instanceof Multipart) {
    238             Multipart multipart = (Multipart)part.getBody();
    239             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    240                 BodyPart bodyPart = multipart.getBodyPart(i);
    241                 Part ret = findPartByContentId(bodyPart, contentId);
    242                 if (ret != null) {
    243                     return ret;
    244                 }
    245             }
    246         }
    247         String cid = part.getContentId();
    248         if (contentId.equals(cid)) {
    249             return part;
    250         }
    251         return null;
    252     }
    253 
    254     /**
    255      * Reads the Part's body and returns a String based on any charset conversion that needed
    256      * to be done.
    257      * @param part The part containing a body
    258      * @return a String containing the converted text in the body, or null if there was no text
    259      * or an error during conversion.
    260      */
    261     public static String getTextFromPart(Part part) {
    262         try {
    263             if (part != null && part.getBody() != null) {
    264                 InputStream in = part.getBody().getInputStream();
    265                 String mimeType = part.getMimeType();
    266                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
    267                     /*
    268                      * Now we read the part into a buffer for further processing. Because
    269                      * the stream is now wrapped we'll remove any transfer encoding at this point.
    270                      */
    271                     ByteArrayOutputStream out = new ByteArrayOutputStream();
    272                     IOUtils.copy(in, out);
    273                     in.close();
    274                     in = null;      // we want all of our memory back, and close might not release
    275 
    276                     /*
    277                      * We've got a text part, so let's see if it needs to be processed further.
    278                      */
    279                     String charset = getHeaderParameter(part.getContentType(), "charset");
    280                     if (charset != null) {
    281                         /*
    282                          * See if there is conversion from the MIME charset to the Java one.
    283                          */
    284                         charset = CharsetUtil.toJavaCharset(charset);
    285                     }
    286                     /*
    287                      * No encoding, so use us-ascii, which is the standard.
    288                      */
    289                     if (charset == null) {
    290                         charset = "ASCII";
    291                     }
    292                     /*
    293                      * Convert and return as new String
    294                      */
    295                     String result = out.toString(charset);
    296                     out.close();
    297                     return result;
    298                 }
    299             }
    300 
    301         }
    302         catch (OutOfMemoryError oom) {
    303             /*
    304              * If we are not able to process the body there's nothing we can do about it. Return
    305              * null and let the upper layers handle the missing content.
    306              */
    307             Log.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    308         }
    309         catch (Exception e) {
    310             /*
    311              * If we are not able to process the body there's nothing we can do about it. Return
    312              * null and let the upper layers handle the missing content.
    313              */
    314             Log.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
    315         }
    316         return null;
    317     }
    318 
    319     /**
    320      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
    321      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
    322      *
    323      * @param mimeType A MIME type to check.
    324      * @param matchAgainst A MIME type to check against. May include wildcards.
    325      * @return true if the mimeType matches
    326      */
    327     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    328         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
    329                 Pattern.CASE_INSENSITIVE);
    330         return p.matcher(mimeType).matches();
    331     }
    332 
    333     /**
    334      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
    335      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
    336      * (e.g. "image/*").
    337      *
    338      * @param mimeType A MIME type to check.
    339      * @param matchAgainst An array of MIME types to check against. May include wildcards.
    340      * @return true if the mimeType matches any of the matchAgainst strings
    341      */
    342     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    343         for (String matchType : matchAgainst) {
    344             if (mimeTypeMatches(mimeType, matchType)) {
    345                 return true;
    346             }
    347         }
    348         return false;
    349     }
    350 
    351     /**
    352      * Given an input stream and a transfer encoding, return a wrapped input stream for that
    353      * encoding (or the original if none is required)
    354      * @param in the input stream
    355      * @param contentTransferEncoding the content transfer encoding
    356      * @return a properly wrapped stream
    357      */
    358     public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
    359             String contentTransferEncoding) {
    360         if (contentTransferEncoding != null) {
    361             contentTransferEncoding =
    362                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
    363             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
    364                 in = new QuotedPrintableInputStream(in);
    365             }
    366             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
    367                 in = new Base64InputStream(in, Base64.DEFAULT);
    368             }
    369         }
    370         return in;
    371     }
    372 
    373     /**
    374      * Removes any content transfer encoding from the stream and returns a Body.
    375      */
    376     public static Body decodeBody(InputStream in, String contentTransferEncoding)
    377             throws IOException {
    378         /*
    379          * We'll remove any transfer encoding by wrapping the stream.
    380          */
    381         in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
    382         BinaryTempFileBody tempBody = new BinaryTempFileBody();
    383         OutputStream out = tempBody.getOutputStream();
    384         try {
    385             IOUtils.copy(in, out);
    386         } catch (Base64DataException bde) {
    387             // TODO Need to fix this somehow
    388             //String warning = "\n\n" + Email.getMessageDecodeErrorString();
    389             //out.write(warning.getBytes());
    390         } finally {
    391             out.close();
    392         }
    393         return tempBody;
    394     }
    395 
    396     /**
    397      * Recursively scan a Part (usually a Message) and sort out which of its children will be
    398      * "viewable" and which will be attachments.
    399      *
    400      * @param part The part to be broken down
    401      * @param viewables This arraylist will be populated with all parts that appear to be
    402      * the "message" (e.g. text/plain & text/html)
    403      * @param attachments This arraylist will be populated with all parts that appear to be
    404      * attachments (including inlines)
    405      * @throws MessagingException
    406      */
    407     public static void collectParts(Part part, ArrayList<Part> viewables,
    408             ArrayList<Part> attachments) throws MessagingException {
    409         String disposition = part.getDisposition();
    410         String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    411         // If a disposition is not specified, default to "inline"
    412         boolean inline =
    413                 TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
    414         // The lower-case mime type
    415         String mimeType = part.getMimeType().toLowerCase();
    416 
    417         if (part.getBody() instanceof Multipart) {
    418             // If the part is Multipart but not alternative it's either mixed or
    419             // something we don't know about, which means we treat it as mixed
    420             // per the spec. We just process its pieces recursively.
    421             MimeMultipart mp = (MimeMultipart)part.getBody();
    422             boolean foundHtml = false;
    423             if (mp.getSubTypeForTest().equals("alternative")) {
    424                 for (int i = 0; i < mp.getCount(); i++) {
    425                     if (mp.getBodyPart(i).isMimeType("text/html")) {
    426                         foundHtml = true;
    427                         break;
    428                     }
    429                 }
    430             }
    431             for (int i = 0; i < mp.getCount(); i++) {
    432                 // See if we have text and html
    433                 BodyPart bp = mp.getBodyPart(i);
    434                 // If there's html, don't bother loading text
    435                 if (foundHtml && bp.isMimeType("text/plain")) {
    436                     continue;
    437                 }
    438                 collectParts(bp, viewables, attachments);
    439             }
    440         } else if (part.getBody() instanceof Message) {
    441             // If the part is an embedded message we just continue to process
    442             // it, pulling any viewables or attachments into the running list.
    443             Message message = (Message)part.getBody();
    444             collectParts(message, viewables, attachments);
    445         } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
    446             // We'll treat text and images as viewables
    447             viewables.add(part);
    448         } else {
    449             // Everything else is an attachment.
    450             attachments.add(part);
    451         }
    452     }
    453 }
    454