Home | History | Annotate | Download | only in internet
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.email.mail.internet;
     18 
     19 import com.android.email.Email;
     20 import com.android.email.mail.Body;
     21 import com.android.email.mail.BodyPart;
     22 import com.android.email.mail.Message;
     23 import com.android.email.mail.MessagingException;
     24 import com.android.email.mail.Multipart;
     25 import com.android.email.mail.Part;
     26 
     27 import org.apache.commons.io.IOUtils;
     28 import org.apache.james.mime4j.codec.EncoderUtil;
     29 import org.apache.james.mime4j.decoder.DecoderUtil;
     30 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     31 import org.apache.james.mime4j.util.CharsetUtil;
     32 
     33 import android.util.Log;
     34 import android.util.Base64;
     35 import android.util.Base64InputStream;
     36 
     37 import java.io.ByteArrayOutputStream;
     38 import java.io.IOException;
     39 import java.io.InputStream;
     40 import java.io.OutputStream;
     41 import java.util.ArrayList;
     42 import java.util.regex.Matcher;
     43 import java.util.regex.Pattern;
     44 
     45 public class MimeUtility {
     46 
     47     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
     48 
     49     /**
     50      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
     51      * object whenever possible.
     52      */
     53     public static String unfold(String s) {
     54         if (s == null) {
     55             return null;
     56         }
     57         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
     58         if (patternMatcher.find()) {
     59             patternMatcher.reset();
     60             s = patternMatcher.replaceAll("");
     61         }
     62         return s;
     63     }
     64 
     65     public static String decode(String s) {
     66         if (s == null) {
     67             return null;
     68         }
     69         return DecoderUtil.decodeEncodedWords(s);
     70     }
     71 
     72     public static String unfoldAndDecode(String s) {
     73         return decode(unfold(s));
     74     }
     75 
     76     // TODO implement proper foldAndEncode
     77     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
     78     // duplication of encoding.
     79     public static String foldAndEncode(String s) {
     80         return s;
     81     }
     82 
     83     /**
     84      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
     85      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
     86      * to other headers.
     87      *
     88      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
     89      *
     90      * @param s original string to encode and fold
     91      * @param usedCharacters number of characters already used up by header name
     92 
     93      * @return the String ready to be transmitted
     94      */
     95     public static String foldAndEncode2(String s, int usedCharacters) {
     96         // james.mime4j.codec.EncoderUtil.java
     97         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
     98         // Usage.TEXT_TOKENlooks like the right thing for subjects
     99         // use WORD_ENTITY for address/names
    100 
    101         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
    102                 usedCharacters);
    103 
    104         return fold(encoded, usedCharacters);
    105     }
    106 
    107     /**
    108      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    109      * the entire MimeUtil class).
    110      *
    111      * Splits the specified string into a multiple-line representation with
    112      * lines no longer than 76 characters (because the line might contain
    113      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
    114      * 2047</a> section 2). If the string contains non-whitespace sequences
    115      * longer than 76 characters a line break is inserted at the whitespace
    116      * character following the sequence resulting in a line longer than 76
    117      * characters.
    118      *
    119      * @param s
    120      *            string to split.
    121      * @param usedCharacters
    122      *            number of characters already used up. Usually the number of
    123      *            characters for header field name plus colon and one space.
    124      * @return a multiple-line representation of the given string.
    125      */
    126     public static String fold(String s, int usedCharacters) {
    127         final int maxCharacters = 76;
    128 
    129         final int length = s.length();
    130         if (usedCharacters + length <= maxCharacters)
    131             return s;
    132 
    133         StringBuilder sb = new StringBuilder();
    134 
    135         int lastLineBreak = -usedCharacters;
    136         int wspIdx = indexOfWsp(s, 0);
    137         while (true) {
    138             if (wspIdx == length) {
    139                 sb.append(s.substring(Math.max(0, lastLineBreak)));
    140                 return sb.toString();
    141             }
    142 
    143             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
    144 
    145             if (nextWspIdx - lastLineBreak > maxCharacters) {
    146                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
    147                 sb.append("\r\n");
    148                 lastLineBreak = wspIdx;
    149             }
    150 
    151             wspIdx = nextWspIdx;
    152         }
    153     }
    154 
    155     /**
    156      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    157      * the entire MimeUtil class).
    158      *
    159      * Search for whitespace.
    160      */
    161     private static int indexOfWsp(String s, int fromIndex) {
    162         final int len = s.length();
    163         for (int index = fromIndex; index < len; index++) {
    164             char c = s.charAt(index);
    165             if (c == ' ' || c == '\t')
    166                 return index;
    167         }
    168         return len;
    169     }
    170 
    171     /**
    172      * Returns the named parameter of a header field. If name is null the first
    173      * parameter is returned, or if there are no additional parameters in the
    174      * field the entire field is returned. Otherwise the named parameter is
    175      * searched for in a case insensitive fashion and returned. If the parameter
    176      * cannot be found the method returns null.
    177      *
    178      * TODO: quite inefficient with the inner trimming & splitting.
    179      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
    180      * TODO: The doc says that for a null name you get the first param, but you get the header.
    181      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
    182      *
    183      * @param header
    184      * @param name
    185      * @return
    186      */
    187     public static String getHeaderParameter(String header, String name) {
    188         if (header == null) {
    189             return null;
    190         }
    191         String[] parts = unfold(header).split(";");
    192         if (name == null) {
    193             return parts[0].trim();
    194         }
    195         String lowerCaseName = name.toLowerCase();
    196         for (String part : parts) {
    197             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
    198                 String parameter = part.split("=", 2)[1].trim();
    199                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
    200                     return parameter.substring(1, parameter.length() - 1);
    201                 }
    202                 else {
    203                     return parameter;
    204                 }
    205             }
    206         }
    207         return null;
    208     }
    209 
    210     public static Part findFirstPartByMimeType(Part part, String mimeType)
    211             throws MessagingException {
    212         if (part.getBody() instanceof Multipart) {
    213             Multipart multipart = (Multipart)part.getBody();
    214             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    215                 BodyPart bodyPart = multipart.getBodyPart(i);
    216                 Part ret = findFirstPartByMimeType(bodyPart, mimeType);
    217                 if (ret != null) {
    218                     return ret;
    219                 }
    220             }
    221         }
    222         else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
    223             return part;
    224         }
    225         return null;
    226     }
    227 
    228     public static Part findPartByContentId(Part part, String contentId) throws Exception {
    229         if (part.getBody() instanceof Multipart) {
    230             Multipart multipart = (Multipart)part.getBody();
    231             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    232                 BodyPart bodyPart = multipart.getBodyPart(i);
    233                 Part ret = findPartByContentId(bodyPart, contentId);
    234                 if (ret != null) {
    235                     return ret;
    236                 }
    237             }
    238         }
    239         String cid = part.getContentId();
    240         if (contentId.equals(cid)) {
    241             return part;
    242         }
    243         return null;
    244     }
    245 
    246     /**
    247      * Reads the Part's body and returns a String based on any charset conversion that needed
    248      * to be done.
    249      * @param part The part containing a body
    250      * @return a String containing the converted text in the body, or null if there was no text
    251      * or an error during conversion.
    252      */
    253     public static String getTextFromPart(Part part) {
    254         try {
    255             if (part != null && part.getBody() != null) {
    256                 InputStream in = part.getBody().getInputStream();
    257                 String mimeType = part.getMimeType();
    258                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
    259                     /*
    260                      * Now we read the part into a buffer for further processing. Because
    261                      * the stream is now wrapped we'll remove any transfer encoding at this point.
    262                      */
    263                     ByteArrayOutputStream out = new ByteArrayOutputStream();
    264                     IOUtils.copy(in, out);
    265                     in.close();
    266                     in = null;      // we want all of our memory back, and close might not release
    267 
    268                     /*
    269                      * We've got a text part, so let's see if it needs to be processed further.
    270                      */
    271                     String charset = getHeaderParameter(part.getContentType(), "charset");
    272                     if (charset != null) {
    273                         /*
    274                          * See if there is conversion from the MIME charset to the Java one.
    275                          */
    276                         charset = CharsetUtil.toJavaCharset(charset);
    277                     }
    278                     /*
    279                      * No encoding, so use us-ascii, which is the standard.
    280                      */
    281                     if (charset == null) {
    282                         charset = "ASCII";
    283                     }
    284                     /*
    285                      * Convert and return as new String
    286                      */
    287                     String result = out.toString(charset);
    288                     out.close();
    289                     return result;
    290                 }
    291             }
    292 
    293         }
    294         catch (OutOfMemoryError oom) {
    295             /*
    296              * If we are not able to process the body there's nothing we can do about it. Return
    297              * null and let the upper layers handle the missing content.
    298              */
    299             Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    300         }
    301         catch (Exception e) {
    302             /*
    303              * If we are not able to process the body there's nothing we can do about it. Return
    304              * null and let the upper layers handle the missing content.
    305              */
    306             Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + e.toString());
    307         }
    308         return null;
    309     }
    310 
    311     /**
    312      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
    313      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
    314      *
    315      * @param mimeType A MIME type to check.
    316      * @param matchAgainst A MIME type to check against. May include wildcards.
    317      * @return true if the mimeType matches
    318      */
    319     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    320         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
    321                 Pattern.CASE_INSENSITIVE);
    322         return p.matcher(mimeType).matches();
    323     }
    324 
    325     /**
    326      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
    327      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
    328      * (e.g. "image/*").
    329      *
    330      * @param mimeType A MIME type to check.
    331      * @param matchAgainst An array of MIME types to check against. May include wildcards.
    332      * @return true if the mimeType matches any of the matchAgainst strings
    333      */
    334     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    335         for (String matchType : matchAgainst) {
    336             if (mimeTypeMatches(mimeType, matchType)) {
    337                 return true;
    338             }
    339         }
    340         return false;
    341     }
    342 
    343     /**
    344      * Removes any content transfer encoding from the stream and returns a Body.
    345      */
    346     public static Body decodeBody(InputStream in, String contentTransferEncoding)
    347             throws IOException {
    348         /*
    349          * We'll remove any transfer encoding by wrapping the stream.
    350          */
    351         if (contentTransferEncoding != null) {
    352             contentTransferEncoding =
    353                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
    354             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
    355                 in = new QuotedPrintableInputStream(in);
    356             }
    357             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
    358                 in = new Base64InputStream(in, Base64.DEFAULT);
    359             }
    360         }
    361 
    362         BinaryTempFileBody tempBody = new BinaryTempFileBody();
    363         OutputStream out = tempBody.getOutputStream();
    364         IOUtils.copy(in, out);
    365         out.close();
    366         return tempBody;
    367     }
    368 
    369     /**
    370      * An unfortunately named method that makes decisions about a Part (usually a Message)
    371      * as to which of it's children will be "viewable" and which will be attachments.
    372      * The method recursively sorts the viewables and attachments into seperate
    373      * lists for further processing.
    374      * @param part
    375      * @param viewables
    376      * @param attachments
    377      * @throws MessagingException
    378      */
    379     public static void collectParts(Part part, ArrayList<Part> viewables,
    380             ArrayList<Part> attachments) throws MessagingException {
    381         String disposition = part.getDisposition();
    382         String dispositionType = null;
    383         String dispositionFilename = null;
    384         if (disposition != null) {
    385             dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    386             dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename");
    387         }
    388 
    389         /*
    390          * A best guess that this part is intended to be an attachment and not inline.
    391          */
    392         boolean attachment = ("attachment".equalsIgnoreCase(dispositionType))
    393                 || (dispositionFilename != null)
    394                 && (!"inline".equalsIgnoreCase(dispositionType));
    395 
    396         /*
    397          * If the part is Multipart but not alternative it's either mixed or
    398          * something we don't know about, which means we treat it as mixed
    399          * per the spec. We just process it's pieces recursively.
    400          */
    401         if (part.getBody() instanceof Multipart) {
    402             Multipart mp = (Multipart)part.getBody();
    403             for (int i = 0; i < mp.getCount(); i++) {
    404                 collectParts(mp.getBodyPart(i), viewables, attachments);
    405             }
    406         }
    407         /*
    408          * If the part is an embedded message we just continue to process
    409          * it, pulling any viewables or attachments into the running list.
    410          */
    411         else if (part.getBody() instanceof Message) {
    412             Message message = (Message)part.getBody();
    413             collectParts(message, viewables, attachments);
    414         }
    415         /*
    416          * If the part is HTML and it got this far it's part of a mixed (et
    417          * al) and should be rendered inline.
    418          */
    419         else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/html"))) {
    420             viewables.add(part);
    421         }
    422         /*
    423          * If the part is plain text and it got this far it's part of a
    424          * mixed (et al) and should be rendered inline.
    425          */
    426         else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/plain"))) {
    427             viewables.add(part);
    428         }
    429         /*
    430          * Finally, if it's nothing else we will include it as an attachment.
    431          */
    432         else {
    433             attachments.add(part);
    434         }
    435     }
    436 }
    437