Home | History | Annotate | Download | only in internet
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.emailcommon.internet;
     18 
     19 import android.text.TextUtils;
     20 import android.util.Base64;
     21 import android.util.Base64DataException;
     22 import android.util.Base64InputStream;
     23 import android.util.Log;
     24 
     25 import com.android.emailcommon.mail.Body;
     26 import com.android.emailcommon.mail.BodyPart;
     27 import com.android.emailcommon.mail.Message;
     28 import com.android.emailcommon.mail.MessagingException;
     29 import com.android.emailcommon.mail.Multipart;
     30 import com.android.emailcommon.mail.Part;
     31 
     32 import org.apache.commons.io.IOUtils;
     33 import org.apache.james.mime4j.codec.EncoderUtil;
     34 import org.apache.james.mime4j.decoder.DecoderUtil;
     35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     36 import org.apache.james.mime4j.util.CharsetUtil;
     37 
     38 import java.io.ByteArrayOutputStream;
     39 import java.io.IOException;
     40 import java.io.InputStream;
     41 import java.io.OutputStream;
     42 import java.util.ArrayList;
     43 import java.util.regex.Matcher;
     44 import java.util.regex.Pattern;
     45 
     46 public class MimeUtility {
     47     private static final String LOG_TAG = "Email";
     48 
     49     public static final String MIME_TYPE_RFC822 = "message/rfc822";
     50     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
     51 
     52     /**
     53      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
     54      * object whenever possible.
     55      */
     56     public static String unfold(String s) {
     57         if (s == null) {
     58             return null;
     59         }
     60         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
     61         if (patternMatcher.find()) {
     62             patternMatcher.reset();
     63             s = patternMatcher.replaceAll("");
     64         }
     65         return s;
     66     }
     67 
     68     public static String decode(String s) {
     69         if (s == null) {
     70             return null;
     71         }
     72         return DecoderUtil.decodeEncodedWords(s);
     73     }
     74 
     75     public static String unfoldAndDecode(String s) {
     76         return decode(unfold(s));
     77     }
     78 
     79     // TODO implement proper foldAndEncode
     80     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
     81     // duplication of encoding.
     82     public static String foldAndEncode(String s) {
     83         return s;
     84     }
     85 
     86     /**
     87      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
     88      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
     89      * to other headers.
     90      *
     91      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
     92      *
     93      * @param s original string to encode and fold
     94      * @param usedCharacters number of characters already used up by header name
     95 
     96      * @return the String ready to be transmitted
     97      */
     98     public static String foldAndEncode2(String s, int usedCharacters) {
     99         // james.mime4j.codec.EncoderUtil.java
    100         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
    101         // Usage.TEXT_TOKENlooks like the right thing for subjects
    102         // use WORD_ENTITY for address/names
    103 
    104         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
    105                 usedCharacters);
    106 
    107         return fold(encoded, usedCharacters);
    108     }
    109 
    110     /**
    111      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    112      * the entire MimeUtil class).
    113      *
    114      * Splits the specified string into a multiple-line representation with
    115      * lines no longer than 76 characters (because the line might contain
    116      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
    117      * 2047</a> section 2). If the string contains non-whitespace sequences
    118      * longer than 76 characters a line break is inserted at the whitespace
    119      * character following the sequence resulting in a line longer than 76
    120      * characters.
    121      *
    122      * @param s
    123      *            string to split.
    124      * @param usedCharacters
    125      *            number of characters already used up. Usually the number of
    126      *            characters for header field name plus colon and one space.
    127      * @return a multiple-line representation of the given string.
    128      */
    129     public static String fold(String s, int usedCharacters) {
    130         final int maxCharacters = 76;
    131 
    132         final int length = s.length();
    133         if (usedCharacters + length <= maxCharacters)
    134             return s;
    135 
    136         StringBuilder sb = new StringBuilder();
    137 
    138         int lastLineBreak = -usedCharacters;
    139         int wspIdx = indexOfWsp(s, 0);
    140         while (true) {
    141             if (wspIdx == length) {
    142                 sb.append(s.substring(Math.max(0, lastLineBreak)));
    143                 return sb.toString();
    144             }
    145 
    146             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
    147 
    148             if (nextWspIdx - lastLineBreak > maxCharacters) {
    149                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
    150                 sb.append("\r\n");
    151                 lastLineBreak = wspIdx;
    152             }
    153 
    154             wspIdx = nextWspIdx;
    155         }
    156     }
    157 
    158     /**
    159      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    160      * the entire MimeUtil class).
    161      *
    162      * Search for whitespace.
    163      */
    164     private static int indexOfWsp(String s, int fromIndex) {
    165         final int len = s.length();
    166         for (int index = fromIndex; index < len; index++) {
    167             char c = s.charAt(index);
    168             if (c == ' ' || c == '\t')
    169                 return index;
    170         }
    171         return len;
    172     }
    173 
    174     /**
    175      * Returns the named parameter of a header field. If name is null the first
    176      * parameter is returned, or if there are no additional parameters in the
    177      * field the entire field is returned. Otherwise the named parameter is
    178      * searched for in a case insensitive fashion and returned. If the parameter
    179      * cannot be found the method returns null.
    180      *
    181      * TODO: quite inefficient with the inner trimming & splitting.
    182      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
    183      * TODO: The doc says that for a null name you get the first param, but you get the header.
    184      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
    185      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
    186      *       ('+' -> ' ' conversion too? check RFC)
    187      *
    188      * @param header
    189      * @param name
    190      * @return the entire header (if name=null), the found parameter, or null
    191      */
    192     public static String getHeaderParameter(String header, String name) {
    193         if (header == null) {
    194             return null;
    195         }
    196         String[] parts = unfold(header).split(";");
    197         if (name == null) {
    198             return parts[0].trim();
    199         }
    200         String lowerCaseName = name.toLowerCase();
    201         for (String part : parts) {
    202             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
    203                 String[] parameterParts = part.split("=", 2);
    204                 if (parameterParts.length < 2) {
    205                     return null;
    206                 }
    207                 String parameter = parameterParts[1].trim();
    208                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
    209                     return parameter.substring(1, parameter.length() - 1);
    210                 } else {
    211                     return parameter;
    212                 }
    213             }
    214         }
    215         return null;
    216     }
    217 
    218     /**
    219      * Reads the Part's body and returns a String based on any charset conversion that needed
    220      * to be done.
    221      * @param part The part containing a body
    222      * @return a String containing the converted text in the body, or null if there was no text
    223      * or an error during conversion.
    224      */
    225     public static String getTextFromPart(Part part) {
    226         try {
    227             if (part != null && part.getBody() != null) {
    228                 InputStream in = part.getBody().getInputStream();
    229                 String mimeType = part.getMimeType();
    230                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
    231                     /*
    232                      * Now we read the part into a buffer for further processing. Because
    233                      * the stream is now wrapped we'll remove any transfer encoding at this point.
    234                      */
    235                     ByteArrayOutputStream out = new ByteArrayOutputStream();
    236                     IOUtils.copy(in, out);
    237                     in.close();
    238                     in = null;      // we want all of our memory back, and close might not release
    239 
    240                     /*
    241                      * We've got a text part, so let's see if it needs to be processed further.
    242                      */
    243                     String charset = getHeaderParameter(part.getContentType(), "charset");
    244                     if (charset != null) {
    245                         /*
    246                          * See if there is conversion from the MIME charset to the Java one.
    247                          */
    248                         charset = CharsetUtil.toJavaCharset(charset);
    249                     }
    250                     /*
    251                      * No encoding, so use us-ascii, which is the standard.
    252                      */
    253                     if (charset == null) {
    254                         charset = "ASCII";
    255                     }
    256                     /*
    257                      * Convert and return as new String
    258                      */
    259                     String result = out.toString(charset);
    260                     out.close();
    261                     return result;
    262                 }
    263             }
    264 
    265         }
    266         catch (OutOfMemoryError oom) {
    267             /*
    268              * If we are not able to process the body there's nothing we can do about it. Return
    269              * null and let the upper layers handle the missing content.
    270              */
    271             Log.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    272         }
    273         catch (Exception e) {
    274             /*
    275              * If we are not able to process the body there's nothing we can do about it. Return
    276              * null and let the upper layers handle the missing content.
    277              */
    278             Log.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
    279         }
    280         return null;
    281     }
    282 
    283     /**
    284      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
    285      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
    286      *
    287      * @param mimeType A MIME type to check.
    288      * @param matchAgainst A MIME type to check against. May include wildcards.
    289      * @return true if the mimeType matches
    290      */
    291     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    292         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
    293                 Pattern.CASE_INSENSITIVE);
    294         return p.matcher(mimeType).matches();
    295     }
    296 
    297     /**
    298      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
    299      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
    300      * (e.g. "image/*").
    301      *
    302      * @param mimeType A MIME type to check.
    303      * @param matchAgainst An array of MIME types to check against. May include wildcards.
    304      * @return true if the mimeType matches any of the matchAgainst strings
    305      */
    306     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    307         for (String matchType : matchAgainst) {
    308             if (mimeTypeMatches(mimeType, matchType)) {
    309                 return true;
    310             }
    311         }
    312         return false;
    313     }
    314 
    315     /**
    316      * Given an input stream and a transfer encoding, return a wrapped input stream for that
    317      * encoding (or the original if none is required)
    318      * @param in the input stream
    319      * @param contentTransferEncoding the content transfer encoding
    320      * @return a properly wrapped stream
    321      */
    322     public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
    323             String contentTransferEncoding) {
    324         if (contentTransferEncoding != null) {
    325             contentTransferEncoding =
    326                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
    327             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
    328                 in = new QuotedPrintableInputStream(in);
    329             }
    330             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
    331                 in = new Base64InputStream(in, Base64.DEFAULT);
    332             }
    333         }
    334         return in;
    335     }
    336 
    337     /**
    338      * Removes any content transfer encoding from the stream and returns a Body.
    339      */
    340     public static Body decodeBody(InputStream in, String contentTransferEncoding)
    341             throws IOException {
    342         /*
    343          * We'll remove any transfer encoding by wrapping the stream.
    344          */
    345         in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
    346         BinaryTempFileBody tempBody = new BinaryTempFileBody();
    347         OutputStream out = tempBody.getOutputStream();
    348         try {
    349             IOUtils.copy(in, out);
    350         } catch (Base64DataException bde) {
    351             // TODO Need to fix this somehow
    352             //String warning = "\n\n" + Email.getMessageDecodeErrorString();
    353             //out.write(warning.getBytes());
    354         } finally {
    355             out.close();
    356         }
    357         return tempBody;
    358     }
    359 
    360     /**
    361      * Recursively scan a Part (usually a Message) and sort out which of its children will be
    362      * "viewable" and which will be attachments.
    363      *
    364      * @param part The part to be broken down
    365      * @param viewables This arraylist will be populated with all parts that appear to be
    366      * the "message" (e.g. text/plain & text/html)
    367      * @param attachments This arraylist will be populated with all parts that appear to be
    368      * attachments (including inlines)
    369      * @throws MessagingException
    370      */
    371     public static void collectParts(Part part, ArrayList<Part> viewables,
    372             ArrayList<Part> attachments) throws MessagingException {
    373         String disposition = part.getDisposition();
    374         String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    375         // If a disposition is not specified, default to "inline"
    376         boolean inline =
    377                 TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
    378         // The lower-case mime type
    379         String mimeType = part.getMimeType().toLowerCase();
    380 
    381         if (part.getBody() instanceof Multipart) {
    382             // If the part is Multipart but not alternative it's either mixed or
    383             // something we don't know about, which means we treat it as mixed
    384             // per the spec. We just process its pieces recursively.
    385             MimeMultipart mp = (MimeMultipart)part.getBody();
    386             boolean foundHtml = false;
    387             if (mp.getSubTypeForTest().equals("alternative")) {
    388                 for (int i = 0; i < mp.getCount(); i++) {
    389                     if (mp.getBodyPart(i).isMimeType("text/html")) {
    390                         foundHtml = true;
    391                         break;
    392                     }
    393                 }
    394             }
    395             for (int i = 0; i < mp.getCount(); i++) {
    396                 // See if we have text and html
    397                 BodyPart bp = mp.getBodyPart(i);
    398                 // If there's html, don't bother loading text
    399                 if (foundHtml && bp.isMimeType("text/plain")) {
    400                     continue;
    401                 }
    402                 collectParts(bp, viewables, attachments);
    403             }
    404         } else if (part.getBody() instanceof Message) {
    405             // If the part is an embedded message we just continue to process
    406             // it, pulling any viewables or attachments into the running list.
    407             Message message = (Message)part.getBody();
    408             collectParts(message, viewables, attachments);
    409         } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
    410             // We'll treat text and images as viewables
    411             viewables.add(part);
    412         } else {
    413             // Everything else is an attachment.
    414             attachments.add(part);
    415         }
    416     }
    417 }
    418