Home | History | Annotate | Download | only in internet
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.email.mail.internet;
     18 
     19 import com.android.email.Email;
     20 import com.android.email.mail.Body;
     21 import com.android.email.mail.BodyPart;
     22 import com.android.email.mail.Message;
     23 import com.android.email.mail.MessagingException;
     24 import com.android.email.mail.Multipart;
     25 import com.android.email.mail.Part;
     26 
     27 import org.apache.commons.io.IOUtils;
     28 import org.apache.james.mime4j.codec.EncoderUtil;
     29 import org.apache.james.mime4j.decoder.DecoderUtil;
     30 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     31 import org.apache.james.mime4j.util.CharsetUtil;
     32 
     33 import android.util.Log;
     34 import android.util.Base64;
     35 import android.util.Base64InputStream;
     36 
     37 import java.io.ByteArrayOutputStream;
     38 import java.io.IOException;
     39 import java.io.InputStream;
     40 import java.io.OutputStream;
     41 import java.util.ArrayList;
     42 import java.util.regex.Matcher;
     43 import java.util.regex.Pattern;
     44 
     45 public class MimeUtility {
     46 
     47     public static final String MIME_TYPE_RFC822 = "message/rfc822";
     48     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
     49 
     50     /**
     51      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
     52      * object whenever possible.
     53      */
     54     public static String unfold(String s) {
     55         if (s == null) {
     56             return null;
     57         }
     58         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
     59         if (patternMatcher.find()) {
     60             patternMatcher.reset();
     61             s = patternMatcher.replaceAll("");
     62         }
     63         return s;
     64     }
     65 
     66     public static String decode(String s) {
     67         if (s == null) {
     68             return null;
     69         }
     70         return DecoderUtil.decodeEncodedWords(s);
     71     }
     72 
     73     public static String unfoldAndDecode(String s) {
     74         return decode(unfold(s));
     75     }
     76 
     77     // TODO implement proper foldAndEncode
     78     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
     79     // duplication of encoding.
     80     public static String foldAndEncode(String s) {
     81         return s;
     82     }
     83 
     84     /**
     85      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
     86      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
     87      * to other headers.
     88      *
     89      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
     90      *
     91      * @param s original string to encode and fold
     92      * @param usedCharacters number of characters already used up by header name
     93 
     94      * @return the String ready to be transmitted
     95      */
     96     public static String foldAndEncode2(String s, int usedCharacters) {
     97         // james.mime4j.codec.EncoderUtil.java
     98         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
     99         // Usage.TEXT_TOKENlooks like the right thing for subjects
    100         // use WORD_ENTITY for address/names
    101 
    102         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
    103                 usedCharacters);
    104 
    105         return fold(encoded, usedCharacters);
    106     }
    107 
    108     /**
    109      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    110      * the entire MimeUtil class).
    111      *
    112      * Splits the specified string into a multiple-line representation with
    113      * lines no longer than 76 characters (because the line might contain
    114      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
    115      * 2047</a> section 2). If the string contains non-whitespace sequences
    116      * longer than 76 characters a line break is inserted at the whitespace
    117      * character following the sequence resulting in a line longer than 76
    118      * characters.
    119      *
    120      * @param s
    121      *            string to split.
    122      * @param usedCharacters
    123      *            number of characters already used up. Usually the number of
    124      *            characters for header field name plus colon and one space.
    125      * @return a multiple-line representation of the given string.
    126      */
    127     public static String fold(String s, int usedCharacters) {
    128         final int maxCharacters = 76;
    129 
    130         final int length = s.length();
    131         if (usedCharacters + length <= maxCharacters)
    132             return s;
    133 
    134         StringBuilder sb = new StringBuilder();
    135 
    136         int lastLineBreak = -usedCharacters;
    137         int wspIdx = indexOfWsp(s, 0);
    138         while (true) {
    139             if (wspIdx == length) {
    140                 sb.append(s.substring(Math.max(0, lastLineBreak)));
    141                 return sb.toString();
    142             }
    143 
    144             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
    145 
    146             if (nextWspIdx - lastLineBreak > maxCharacters) {
    147                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
    148                 sb.append("\r\n");
    149                 lastLineBreak = wspIdx;
    150             }
    151 
    152             wspIdx = nextWspIdx;
    153         }
    154     }
    155 
    156     /**
    157      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    158      * the entire MimeUtil class).
    159      *
    160      * Search for whitespace.
    161      */
    162     private static int indexOfWsp(String s, int fromIndex) {
    163         final int len = s.length();
    164         for (int index = fromIndex; index < len; index++) {
    165             char c = s.charAt(index);
    166             if (c == ' ' || c == '\t')
    167                 return index;
    168         }
    169         return len;
    170     }
    171 
    172     /**
    173      * Returns the named parameter of a header field. If name is null the first
    174      * parameter is returned, or if there are no additional parameters in the
    175      * field the entire field is returned. Otherwise the named parameter is
    176      * searched for in a case insensitive fashion and returned. If the parameter
    177      * cannot be found the method returns null.
    178      *
    179      * TODO: quite inefficient with the inner trimming & splitting.
    180      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
    181      * TODO: The doc says that for a null name you get the first param, but you get the header.
    182      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
    183      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
    184      *       ('+' -> ' ' conversion too? check RFC)
    185      *
    186      * @param header
    187      * @param name
    188      * @return the entire header (if name=null), the found parameter, or null
    189      */
    190     public static String getHeaderParameter(String header, String name) {
    191         if (header == null) {
    192             return null;
    193         }
    194         String[] parts = unfold(header).split(";");
    195         if (name == null) {
    196             return parts[0].trim();
    197         }
    198         String lowerCaseName = name.toLowerCase();
    199         for (String part : parts) {
    200             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
    201                 String[] parameterParts = part.split("=", 2);
    202                 if (parameterParts.length < 2) {
    203                     return null;
    204                 }
    205                 String parameter = parameterParts[1].trim();
    206                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
    207                     return parameter.substring(1, parameter.length() - 1);
    208                 } else {
    209                     return parameter;
    210                 }
    211             }
    212         }
    213         return null;
    214     }
    215 
    216     public static Part findFirstPartByMimeType(Part part, String mimeType)
    217             throws MessagingException {
    218         if (part.getBody() instanceof Multipart) {
    219             Multipart multipart = (Multipart)part.getBody();
    220             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    221                 BodyPart bodyPart = multipart.getBodyPart(i);
    222                 Part ret = findFirstPartByMimeType(bodyPart, mimeType);
    223                 if (ret != null) {
    224                     return ret;
    225                 }
    226             }
    227         }
    228         else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
    229             return part;
    230         }
    231         return null;
    232     }
    233 
    234     public static Part findPartByContentId(Part part, String contentId) throws Exception {
    235         if (part.getBody() instanceof Multipart) {
    236             Multipart multipart = (Multipart)part.getBody();
    237             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    238                 BodyPart bodyPart = multipart.getBodyPart(i);
    239                 Part ret = findPartByContentId(bodyPart, contentId);
    240                 if (ret != null) {
    241                     return ret;
    242                 }
    243             }
    244         }
    245         String cid = part.getContentId();
    246         if (contentId.equals(cid)) {
    247             return part;
    248         }
    249         return null;
    250     }
    251 
    252     /**
    253      * Reads the Part's body and returns a String based on any charset conversion that needed
    254      * to be done.
    255      * @param part The part containing a body
    256      * @return a String containing the converted text in the body, or null if there was no text
    257      * or an error during conversion.
    258      */
    259     public static String getTextFromPart(Part part) {
    260         try {
    261             if (part != null && part.getBody() != null) {
    262                 InputStream in = part.getBody().getInputStream();
    263                 String mimeType = part.getMimeType();
    264                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
    265                     /*
    266                      * Now we read the part into a buffer for further processing. Because
    267                      * the stream is now wrapped we'll remove any transfer encoding at this point.
    268                      */
    269                     ByteArrayOutputStream out = new ByteArrayOutputStream();
    270                     IOUtils.copy(in, out);
    271                     in.close();
    272                     in = null;      // we want all of our memory back, and close might not release
    273 
    274                     /*
    275                      * We've got a text part, so let's see if it needs to be processed further.
    276                      */
    277                     String charset = getHeaderParameter(part.getContentType(), "charset");
    278                     if (charset != null) {
    279                         /*
    280                          * See if there is conversion from the MIME charset to the Java one.
    281                          */
    282                         charset = CharsetUtil.toJavaCharset(charset);
    283                     }
    284                     /*
    285                      * No encoding, so use us-ascii, which is the standard.
    286                      */
    287                     if (charset == null) {
    288                         charset = "ASCII";
    289                     }
    290                     /*
    291                      * Convert and return as new String
    292                      */
    293                     String result = out.toString(charset);
    294                     out.close();
    295                     return result;
    296                 }
    297             }
    298 
    299         }
    300         catch (OutOfMemoryError oom) {
    301             /*
    302              * If we are not able to process the body there's nothing we can do about it. Return
    303              * null and let the upper layers handle the missing content.
    304              */
    305             Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    306         }
    307         catch (Exception e) {
    308             /*
    309              * If we are not able to process the body there's nothing we can do about it. Return
    310              * null and let the upper layers handle the missing content.
    311              */
    312             Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + e.toString());
    313         }
    314         return null;
    315     }
    316 
    317     /**
    318      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
    319      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
    320      *
    321      * @param mimeType A MIME type to check.
    322      * @param matchAgainst A MIME type to check against. May include wildcards.
    323      * @return true if the mimeType matches
    324      */
    325     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    326         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
    327                 Pattern.CASE_INSENSITIVE);
    328         return p.matcher(mimeType).matches();
    329     }
    330 
    331     /**
    332      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
    333      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
    334      * (e.g. "image/*").
    335      *
    336      * @param mimeType A MIME type to check.
    337      * @param matchAgainst An array of MIME types to check against. May include wildcards.
    338      * @return true if the mimeType matches any of the matchAgainst strings
    339      */
    340     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    341         for (String matchType : matchAgainst) {
    342             if (mimeTypeMatches(mimeType, matchType)) {
    343                 return true;
    344             }
    345         }
    346         return false;
    347     }
    348 
    349     /**
    350      * Removes any content transfer encoding from the stream and returns a Body.
    351      */
    352     public static Body decodeBody(InputStream in, String contentTransferEncoding)
    353             throws IOException {
    354         /*
    355          * We'll remove any transfer encoding by wrapping the stream.
    356          */
    357         if (contentTransferEncoding != null) {
    358             contentTransferEncoding =
    359                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
    360             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
    361                 in = new QuotedPrintableInputStream(in);
    362             }
    363             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
    364                 in = new Base64InputStream(in, Base64.DEFAULT);
    365             }
    366         }
    367 
    368         BinaryTempFileBody tempBody = new BinaryTempFileBody();
    369         OutputStream out = tempBody.getOutputStream();
    370         IOUtils.copy(in, out);
    371         out.close();
    372         return tempBody;
    373     }
    374 
    375     /**
    376      * An unfortunately named method that makes decisions about a Part (usually a Message)
    377      * as to which of it's children will be "viewable" and which will be attachments.
    378      * The method recursively sorts the viewables and attachments into seperate
    379      * lists for further processing.
    380      * @param part
    381      * @param viewables
    382      * @param attachments
    383      * @throws MessagingException
    384      */
    385     public static void collectParts(Part part, ArrayList<Part> viewables,
    386             ArrayList<Part> attachments) throws MessagingException {
    387         String disposition = part.getDisposition();
    388         String dispositionType = null;
    389         String dispositionFilename = null;
    390         if (disposition != null) {
    391             dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    392             dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename");
    393         }
    394 
    395         /*
    396          * A best guess that this part is intended to be an attachment and not inline.
    397          */
    398         boolean attachment = ("attachment".equalsIgnoreCase(dispositionType))
    399                 || (dispositionFilename != null)
    400                 && (!"inline".equalsIgnoreCase(dispositionType));
    401 
    402         /*
    403          * If the part is Multipart but not alternative it's either mixed or
    404          * something we don't know about, which means we treat it as mixed
    405          * per the spec. We just process it's pieces recursively.
    406          */
    407         if (part.getBody() instanceof Multipart) {
    408             Multipart mp = (Multipart)part.getBody();
    409             for (int i = 0; i < mp.getCount(); i++) {
    410                 collectParts(mp.getBodyPart(i), viewables, attachments);
    411             }
    412         }
    413         /*
    414          * If the part is an embedded message we just continue to process
    415          * it, pulling any viewables or attachments into the running list.
    416          */
    417         else if (part.getBody() instanceof Message) {
    418             Message message = (Message)part.getBody();
    419             collectParts(message, viewables, attachments);
    420         }
    421         /*
    422          * If the part is HTML and it got this far it's part of a mixed (et
    423          * al) and should be rendered inline.
    424          */
    425         else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/html"))) {
    426             viewables.add(part);
    427         }
    428         /*
    429          * If the part is plain text and it got this far it's part of a
    430          * mixed (et al) and should be rendered inline.
    431          */
    432         else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/plain"))) {
    433             viewables.add(part);
    434         }
    435         /*
    436          * Finally, if it's nothing else we will include it as an attachment.
    437          */
    438         else {
    439             attachments.add(part);
    440         }
    441     }
    442 }
    443