Home | History | Annotate | Download | only in internet
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.emailcommon.internet;
     18 
     19 import android.util.Base64;
     20 import android.util.Base64DataException;
     21 import android.util.Base64InputStream;
     22 import android.util.Log;
     23 
     24 import com.android.emailcommon.Logging;
     25 import com.android.emailcommon.mail.Body;
     26 import com.android.emailcommon.mail.BodyPart;
     27 import com.android.emailcommon.mail.Message;
     28 import com.android.emailcommon.mail.MessagingException;
     29 import com.android.emailcommon.mail.Multipart;
     30 import com.android.emailcommon.mail.Part;
     31 
     32 import org.apache.commons.io.IOUtils;
     33 import org.apache.james.mime4j.codec.EncoderUtil;
     34 import org.apache.james.mime4j.decoder.DecoderUtil;
     35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     36 import org.apache.james.mime4j.util.CharsetUtil;
     37 
     38 import java.io.ByteArrayOutputStream;
     39 import java.io.IOException;
     40 import java.io.InputStream;
     41 import java.io.OutputStream;
     42 import java.util.ArrayList;
     43 import java.util.regex.Matcher;
     44 import java.util.regex.Pattern;
     45 
     46 public class MimeUtility {
     47 
     48     public static final String MIME_TYPE_RFC822 = "message/rfc822";
     49     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
     50 
     51     /**
     52      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
     53      * object whenever possible.
     54      */
     55     public static String unfold(String s) {
     56         if (s == null) {
     57             return null;
     58         }
     59         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
     60         if (patternMatcher.find()) {
     61             patternMatcher.reset();
     62             s = patternMatcher.replaceAll("");
     63         }
     64         return s;
     65     }
     66 
     67     public static String decode(String s) {
     68         if (s == null) {
     69             return null;
     70         }
     71         return DecoderUtil.decodeEncodedWords(s);
     72     }
     73 
     74     public static String unfoldAndDecode(String s) {
     75         return decode(unfold(s));
     76     }
     77 
     78     // TODO implement proper foldAndEncode
     79     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
     80     // duplication of encoding.
     81     public static String foldAndEncode(String s) {
     82         return s;
     83     }
     84 
     85     /**
     86      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
     87      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
     88      * to other headers.
     89      *
     90      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
     91      *
     92      * @param s original string to encode and fold
     93      * @param usedCharacters number of characters already used up by header name
     94 
     95      * @return the String ready to be transmitted
     96      */
     97     public static String foldAndEncode2(String s, int usedCharacters) {
     98         // james.mime4j.codec.EncoderUtil.java
     99         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
    100         // Usage.TEXT_TOKENlooks like the right thing for subjects
    101         // use WORD_ENTITY for address/names
    102 
    103         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
    104                 usedCharacters);
    105 
    106         return fold(encoded, usedCharacters);
    107     }
    108 
    109     /**
    110      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    111      * the entire MimeUtil class).
    112      *
    113      * Splits the specified string into a multiple-line representation with
    114      * lines no longer than 76 characters (because the line might contain
    115      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
    116      * 2047</a> section 2). If the string contains non-whitespace sequences
    117      * longer than 76 characters a line break is inserted at the whitespace
    118      * character following the sequence resulting in a line longer than 76
    119      * characters.
    120      *
    121      * @param s
    122      *            string to split.
    123      * @param usedCharacters
    124      *            number of characters already used up. Usually the number of
    125      *            characters for header field name plus colon and one space.
    126      * @return a multiple-line representation of the given string.
    127      */
    128     public static String fold(String s, int usedCharacters) {
    129         final int maxCharacters = 76;
    130 
    131         final int length = s.length();
    132         if (usedCharacters + length <= maxCharacters)
    133             return s;
    134 
    135         StringBuilder sb = new StringBuilder();
    136 
    137         int lastLineBreak = -usedCharacters;
    138         int wspIdx = indexOfWsp(s, 0);
    139         while (true) {
    140             if (wspIdx == length) {
    141                 sb.append(s.substring(Math.max(0, lastLineBreak)));
    142                 return sb.toString();
    143             }
    144 
    145             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
    146 
    147             if (nextWspIdx - lastLineBreak > maxCharacters) {
    148                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
    149                 sb.append("\r\n");
    150                 lastLineBreak = wspIdx;
    151             }
    152 
    153             wspIdx = nextWspIdx;
    154         }
    155     }
    156 
    157     /**
    158      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    159      * the entire MimeUtil class).
    160      *
    161      * Search for whitespace.
    162      */
    163     private static int indexOfWsp(String s, int fromIndex) {
    164         final int len = s.length();
    165         for (int index = fromIndex; index < len; index++) {
    166             char c = s.charAt(index);
    167             if (c == ' ' || c == '\t')
    168                 return index;
    169         }
    170         return len;
    171     }
    172 
    173     /**
    174      * Returns the named parameter of a header field. If name is null the first
    175      * parameter is returned, or if there are no additional parameters in the
    176      * field the entire field is returned. Otherwise the named parameter is
    177      * searched for in a case insensitive fashion and returned. If the parameter
    178      * cannot be found the method returns null.
    179      *
    180      * TODO: quite inefficient with the inner trimming & splitting.
    181      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
    182      * TODO: The doc says that for a null name you get the first param, but you get the header.
    183      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
    184      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
    185      *       ('+' -> ' ' conversion too? check RFC)
    186      *
    187      * @param header
    188      * @param name
    189      * @return the entire header (if name=null), the found parameter, or null
    190      */
    191     public static String getHeaderParameter(String header, String name) {
    192         if (header == null) {
    193             return null;
    194         }
    195         String[] parts = unfold(header).split(";");
    196         if (name == null) {
    197             return parts[0].trim();
    198         }
    199         String lowerCaseName = name.toLowerCase();
    200         for (String part : parts) {
    201             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
    202                 String[] parameterParts = part.split("=", 2);
    203                 if (parameterParts.length < 2) {
    204                     return null;
    205                 }
    206                 String parameter = parameterParts[1].trim();
    207                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
    208                     return parameter.substring(1, parameter.length() - 1);
    209                 } else {
    210                     return parameter;
    211                 }
    212             }
    213         }
    214         return null;
    215     }
    216 
    217     public static Part findFirstPartByMimeType(Part part, String mimeType)
    218             throws MessagingException {
    219         if (part.getBody() instanceof Multipart) {
    220             Multipart multipart = (Multipart)part.getBody();
    221             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    222                 BodyPart bodyPart = multipart.getBodyPart(i);
    223                 Part ret = findFirstPartByMimeType(bodyPart, mimeType);
    224                 if (ret != null) {
    225                     return ret;
    226                 }
    227             }
    228         }
    229         else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
    230             return part;
    231         }
    232         return null;
    233     }
    234 
    235     public static Part findPartByContentId(Part part, String contentId) throws Exception {
    236         if (part.getBody() instanceof Multipart) {
    237             Multipart multipart = (Multipart)part.getBody();
    238             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    239                 BodyPart bodyPart = multipart.getBodyPart(i);
    240                 Part ret = findPartByContentId(bodyPart, contentId);
    241                 if (ret != null) {
    242                     return ret;
    243                 }
    244             }
    245         }
    246         String cid = part.getContentId();
    247         if (contentId.equals(cid)) {
    248             return part;
    249         }
    250         return null;
    251     }
    252 
    253     /**
    254      * Reads the Part's body and returns a String based on any charset conversion that needed
    255      * to be done.
    256      * @param part The part containing a body
    257      * @return a String containing the converted text in the body, or null if there was no text
    258      * or an error during conversion.
    259      */
    260     public static String getTextFromPart(Part part) {
    261         try {
    262             if (part != null && part.getBody() != null) {
    263                 InputStream in = part.getBody().getInputStream();
    264                 String mimeType = part.getMimeType();
    265                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
    266                     /*
    267                      * Now we read the part into a buffer for further processing. Because
    268                      * the stream is now wrapped we'll remove any transfer encoding at this point.
    269                      */
    270                     ByteArrayOutputStream out = new ByteArrayOutputStream();
    271                     IOUtils.copy(in, out);
    272                     in.close();
    273                     in = null;      // we want all of our memory back, and close might not release
    274 
    275                     /*
    276                      * We've got a text part, so let's see if it needs to be processed further.
    277                      */
    278                     String charset = getHeaderParameter(part.getContentType(), "charset");
    279                     if (charset != null) {
    280                         /*
    281                          * See if there is conversion from the MIME charset to the Java one.
    282                          */
    283                         charset = CharsetUtil.toJavaCharset(charset);
    284                     }
    285                     /*
    286                      * No encoding, so use us-ascii, which is the standard.
    287                      */
    288                     if (charset == null) {
    289                         charset = "ASCII";
    290                     }
    291                     /*
    292                      * Convert and return as new String
    293                      */
    294                     String result = out.toString(charset);
    295                     out.close();
    296                     return result;
    297                 }
    298             }
    299 
    300         }
    301         catch (OutOfMemoryError oom) {
    302             /*
    303              * If we are not able to process the body there's nothing we can do about it. Return
    304              * null and let the upper layers handle the missing content.
    305              */
    306             Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    307         }
    308         catch (Exception e) {
    309             /*
    310              * If we are not able to process the body there's nothing we can do about it. Return
    311              * null and let the upper layers handle the missing content.
    312              */
    313             Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + e.toString());
    314         }
    315         return null;
    316     }
    317 
    318     /**
    319      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
    320      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
    321      *
    322      * @param mimeType A MIME type to check.
    323      * @param matchAgainst A MIME type to check against. May include wildcards.
    324      * @return true if the mimeType matches
    325      */
    326     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    327         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
    328                 Pattern.CASE_INSENSITIVE);
    329         return p.matcher(mimeType).matches();
    330     }
    331 
    332     /**
    333      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
    334      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
    335      * (e.g. "image/*").
    336      *
    337      * @param mimeType A MIME type to check.
    338      * @param matchAgainst An array of MIME types to check against. May include wildcards.
    339      * @return true if the mimeType matches any of the matchAgainst strings
    340      */
    341     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    342         for (String matchType : matchAgainst) {
    343             if (mimeTypeMatches(mimeType, matchType)) {
    344                 return true;
    345             }
    346         }
    347         return false;
    348     }
    349 
    350     /**
    351      * Given an input stream and a transfer encoding, return a wrapped input stream for that
    352      * encoding (or the original if none is required)
    353      * @param in the input stream
    354      * @param contentTransferEncoding the content transfer encoding
    355      * @return a properly wrapped stream
    356      */
    357     public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
    358             String contentTransferEncoding) {
    359         if (contentTransferEncoding != null) {
    360             contentTransferEncoding =
    361                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
    362             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
    363                 in = new QuotedPrintableInputStream(in);
    364             }
    365             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
    366                 in = new Base64InputStream(in, Base64.DEFAULT);
    367             }
    368         }
    369         return in;
    370     }
    371 
    372     /**
    373      * Removes any content transfer encoding from the stream and returns a Body.
    374      */
    375     public static Body decodeBody(InputStream in, String contentTransferEncoding)
    376             throws IOException {
    377         /*
    378          * We'll remove any transfer encoding by wrapping the stream.
    379          */
    380         in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
    381         BinaryTempFileBody tempBody = new BinaryTempFileBody();
    382         OutputStream out = tempBody.getOutputStream();
    383         try {
    384             IOUtils.copy(in, out);
    385         } catch (Base64DataException bde) {
    386             // TODO Need to fix this somehow
    387             //String warning = "\n\n" + Email.getMessageDecodeErrorString();
    388             //out.write(warning.getBytes());
    389         } finally {
    390             out.close();
    391         }
    392         return tempBody;
    393     }
    394 
    395     /**
    396      * Recursively scan a Part (usually a Message) and sort out which of its children will be
    397      * "viewable" and which will be attachments.
    398      *
    399      * @param part The part to be broken down
    400      * @param viewables This arraylist will be populated with all parts that appear to be
    401      * the "message" (e.g. text/plain & text/html)
    402      * @param attachments This arraylist will be populated with all parts that appear to be
    403      * attachments (including inlines)
    404      * @throws MessagingException
    405      */
    406     public static void collectParts(Part part, ArrayList<Part> viewables,
    407             ArrayList<Part> attachments) throws MessagingException {
    408         String disposition = part.getDisposition();
    409         String dispositionType = null;
    410         String dispositionFilename = null;
    411         if (disposition != null) {
    412             dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    413             dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename");
    414         }
    415         // An attachment filename can be defined in either the Content-Disposition header
    416         // or the Content-Type header. Content-Disposition is preferred, so we only try
    417         // the Content-Type header as a last resort.
    418         if (dispositionFilename == null) {
    419             String contentType = part.getContentType();
    420             dispositionFilename = MimeUtility.getHeaderParameter(contentType, "name");
    421         }
    422         boolean attachmentDisposition = "attachment".equalsIgnoreCase(dispositionType);
    423         // If a disposition is not specified, default to "inline"
    424         boolean inlineDisposition = dispositionType == null
    425                 || "inline".equalsIgnoreCase(dispositionType);
    426 
    427         // A guess that this part is intended to be an attachment
    428         boolean attachment = attachmentDisposition
    429                 || (dispositionFilename != null && !inlineDisposition);
    430 
    431         // A guess that this part is intended to be an inline.
    432         boolean inline = inlineDisposition && (dispositionFilename != null);
    433 
    434         // One or the other
    435         boolean attachmentOrInline = attachment || inline;
    436 
    437         if (part.getBody() instanceof Multipart) {
    438             // If the part is Multipart but not alternative it's either mixed or
    439             // something we don't know about, which means we treat it as mixed
    440             // per the spec. We just process its pieces recursively.
    441             MimeMultipart mp = (MimeMultipart)part.getBody();
    442             boolean foundHtml = false;
    443             if (mp.getSubTypeForTest().equals("alternative")) {
    444                 for (int i = 0; i < mp.getCount(); i++) {
    445                     if (mp.getBodyPart(i).isMimeType("text/html")) {
    446                         foundHtml = true;
    447                         break;
    448                     }
    449                 }
    450             }
    451             for (int i = 0; i < mp.getCount(); i++) {
    452                 // See if we have text and html
    453                 BodyPart bp = mp.getBodyPart(i);
    454                 // If there's html, don't bother loading text
    455                 if (foundHtml && bp.isMimeType("text/plain")) {
    456                     continue;
    457                 }
    458                 collectParts(bp, viewables, attachments);
    459             }
    460         } else if (part.getBody() instanceof Message) {
    461             // If the part is an embedded message we just continue to process
    462             // it, pulling any viewables or attachments into the running list.
    463             Message message = (Message)part.getBody();
    464             collectParts(message, viewables, attachments);
    465         } else if ((!attachmentOrInline) && ("text/html".equalsIgnoreCase(part.getMimeType()))) {
    466             // If the part is HTML and we got this far, it's a viewable part of a mixed
    467             viewables.add(part);
    468         } else if ((!attachmentOrInline) && ("text/plain".equalsIgnoreCase(part.getMimeType()))) {
    469             // If the part is text and we got this far, it's a viewable part of a mixed
    470             viewables.add(part);
    471         } else if (attachmentOrInline) {
    472             // Finally, if it's an attachment or an inline we will include it as an attachment.
    473             attachments.add(part);
    474         }
    475     }
    476 }
    477