Home | History | Annotate | Download | only in internet
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.email.mail.internet;
     18 
     19 import com.android.email.Email;
     20 import com.android.email.mail.Body;
     21 import com.android.email.mail.BodyPart;
     22 import com.android.email.mail.Message;
     23 import com.android.email.mail.MessagingException;
     24 import com.android.email.mail.Multipart;
     25 import com.android.email.mail.Part;
     26 
     27 import org.apache.commons.io.IOUtils;
     28 import org.apache.james.mime4j.codec.EncoderUtil;
     29 import org.apache.james.mime4j.decoder.Base64InputStream;
     30 import org.apache.james.mime4j.decoder.DecoderUtil;
     31 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
     32 import org.apache.james.mime4j.util.CharsetUtil;
     33 
     34 import android.util.Log;
     35 
     36 import java.io.ByteArrayOutputStream;
     37 import java.io.IOException;
     38 import java.io.InputStream;
     39 import java.io.OutputStream;
     40 import java.util.ArrayList;
     41 import java.util.regex.Matcher;
     42 import java.util.regex.Pattern;
     43 
     44 public class MimeUtility {
     45 
     46     public static final String MIME_TYPE_RFC822 = "message/rfc822";
     47     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
     48 
     49     /**
     50      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
     51      * object whenever possible.
     52      */
     53     public static String unfold(String s) {
     54         if (s == null) {
     55             return null;
     56         }
     57         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
     58         if (patternMatcher.find()) {
     59             patternMatcher.reset();
     60             s = patternMatcher.replaceAll("");
     61         }
     62         return s;
     63     }
     64 
     65     public static String decode(String s) {
     66         if (s == null) {
     67             return null;
     68         }
     69         return DecoderUtil.decodeEncodedWords(s);
     70     }
     71 
     72     public static String unfoldAndDecode(String s) {
     73         return decode(unfold(s));
     74     }
     75 
     76     // TODO implement proper foldAndEncode
     77     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
     78     // duplication of encoding.
     79     public static String foldAndEncode(String s) {
     80         return s;
     81     }
     82 
     83     /**
     84      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
     85      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
     86      * to other headers.
     87      *
     88      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
     89      *
     90      * @param s original string to encode and fold
     91      * @param usedCharacters number of characters already used up by header name
     92 
     93      * @return the String ready to be transmitted
     94      */
     95     public static String foldAndEncode2(String s, int usedCharacters) {
     96         // james.mime4j.codec.EncoderUtil.java
     97         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
     98         // Usage.TEXT_TOKENlooks like the right thing for subjects
     99         // use WORD_ENTITY for address/names
    100 
    101         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
    102                 usedCharacters);
    103 
    104         return fold(encoded, usedCharacters);
    105     }
    106 
    107     /**
    108      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    109      * the entire MimeUtil class).
    110      *
    111      * Splits the specified string into a multiple-line representation with
    112      * lines no longer than 76 characters (because the line might contain
    113      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
    114      * 2047</a> section 2). If the string contains non-whitespace sequences
    115      * longer than 76 characters a line break is inserted at the whitespace
    116      * character following the sequence resulting in a line longer than 76
    117      * characters.
    118      *
    119      * @param s
    120      *            string to split.
    121      * @param usedCharacters
    122      *            number of characters already used up. Usually the number of
    123      *            characters for header field name plus colon and one space.
    124      * @return a multiple-line representation of the given string.
    125      */
    126     public static String fold(String s, int usedCharacters) {
    127         final int maxCharacters = 76;
    128 
    129         final int length = s.length();
    130         if (usedCharacters + length <= maxCharacters)
    131             return s;
    132 
    133         StringBuilder sb = new StringBuilder();
    134 
    135         int lastLineBreak = -usedCharacters;
    136         int wspIdx = indexOfWsp(s, 0);
    137         while (true) {
    138             if (wspIdx == length) {
    139                 sb.append(s.substring(Math.max(0, lastLineBreak)));
    140                 return sb.toString();
    141             }
    142 
    143             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
    144 
    145             if (nextWspIdx - lastLineBreak > maxCharacters) {
    146                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
    147                 sb.append("\r\n");
    148                 lastLineBreak = wspIdx;
    149             }
    150 
    151             wspIdx = nextWspIdx;
    152         }
    153     }
    154 
    155     /**
    156      * INTERIM:  From newer version of org.apache.james (but we don't want to import
    157      * the entire MimeUtil class).
    158      *
    159      * Search for whitespace.
    160      */
    161     private static int indexOfWsp(String s, int fromIndex) {
    162         final int len = s.length();
    163         for (int index = fromIndex; index < len; index++) {
    164             char c = s.charAt(index);
    165             if (c == ' ' || c == '\t')
    166                 return index;
    167         }
    168         return len;
    169     }
    170 
    171     /**
    172      * Returns the named parameter of a header field. If name is null the first
    173      * parameter is returned, or if there are no additional parameters in the
    174      * field the entire field is returned. Otherwise the named parameter is
    175      * searched for in a case insensitive fashion and returned. If the parameter
    176      * cannot be found the method returns null.
    177      *
    178      * TODO: quite inefficient with the inner trimming & splitting.
    179      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
    180      * TODO: The doc says that for a null name you get the first param, but you get the header.
    181      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
    182      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
    183      *       ('+' -> ' ' conversion too? check RFC)
    184      *
    185      * @param header
    186      * @param name
    187      * @return the entire header (if name=null), the found parameter, or null
    188      */
    189     public static String getHeaderParameter(String header, String name) {
    190         if (header == null) {
    191             return null;
    192         }
    193         String[] parts = unfold(header).split(";");
    194         if (name == null) {
    195             return parts[0].trim();
    196         }
    197         String lowerCaseName = name.toLowerCase();
    198         for (String part : parts) {
    199             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
    200                 String[] parameterParts = part.split("=", 2);
    201                 if (parameterParts.length < 2) {
    202                     return null;
    203                 }
    204                 String parameter = parameterParts[1].trim();
    205                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
    206                     return parameter.substring(1, parameter.length() - 1);
    207                 } else {
    208                     return parameter;
    209                 }
    210             }
    211         }
    212         return null;
    213     }
    214 
    215     public static Part findFirstPartByMimeType(Part part, String mimeType)
    216             throws MessagingException {
    217         if (part.getBody() instanceof Multipart) {
    218             Multipart multipart = (Multipart)part.getBody();
    219             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    220                 BodyPart bodyPart = multipart.getBodyPart(i);
    221                 Part ret = findFirstPartByMimeType(bodyPart, mimeType);
    222                 if (ret != null) {
    223                     return ret;
    224                 }
    225             }
    226         }
    227         else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
    228             return part;
    229         }
    230         return null;
    231     }
    232 
    233     public static Part findPartByContentId(Part part, String contentId) throws Exception {
    234         if (part.getBody() instanceof Multipart) {
    235             Multipart multipart = (Multipart)part.getBody();
    236             for (int i = 0, count = multipart.getCount(); i < count; i++) {
    237                 BodyPart bodyPart = multipart.getBodyPart(i);
    238                 Part ret = findPartByContentId(bodyPart, contentId);
    239                 if (ret != null) {
    240                     return ret;
    241                 }
    242             }
    243         }
    244         String cid = part.getContentId();
    245         if (contentId.equals(cid)) {
    246             return part;
    247         }
    248         return null;
    249     }
    250 
    251     /**
    252      * Reads the Part's body and returns a String based on any charset conversion that needed
    253      * to be done.
    254      * @param part The part containing a body
    255      * @return a String containing the converted text in the body, or null if there was no text
    256      * or an error during conversion.
    257      */
    258     public static String getTextFromPart(Part part) {
    259         try {
    260             if (part != null && part.getBody() != null) {
    261                 InputStream in = part.getBody().getInputStream();
    262                 String mimeType = part.getMimeType();
    263                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
    264                     /*
    265                      * Now we read the part into a buffer for further processing. Because
    266                      * the stream is now wrapped we'll remove any transfer encoding at this point.
    267                      */
    268                     ByteArrayOutputStream out = new ByteArrayOutputStream();
    269                     IOUtils.copy(in, out);
    270                     in.close();
    271                     in = null;      // we want all of our memory back, and close might not release
    272 
    273                     /*
    274                      * We've got a text part, so let's see if it needs to be processed further.
    275                      */
    276                     String charset = getHeaderParameter(part.getContentType(), "charset");
    277                     if (charset != null) {
    278                         /*
    279                          * See if there is conversion from the MIME charset to the Java one.
    280                          */
    281                         charset = CharsetUtil.toJavaCharset(charset);
    282                     }
    283                     /*
    284                      * No encoding, so use us-ascii, which is the standard.
    285                      */
    286                     if (charset == null) {
    287                         charset = "ASCII";
    288                     }
    289                     /*
    290                      * Convert and return as new String
    291                      */
    292                     String result = out.toString(charset);
    293                     out.close();
    294                     return result;
    295                 }
    296             }
    297 
    298         }
    299         catch (OutOfMemoryError oom) {
    300             /*
    301              * If we are not able to process the body there's nothing we can do about it. Return
    302              * null and let the upper layers handle the missing content.
    303              */
    304             Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    305         }
    306         catch (Exception e) {
    307             /*
    308              * If we are not able to process the body there's nothing we can do about it. Return
    309              * null and let the upper layers handle the missing content.
    310              */
    311             Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + e.toString());
    312         }
    313         return null;
    314     }
    315 
    316     /**
    317      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
    318      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
    319      *
    320      * @param mimeType A MIME type to check.
    321      * @param matchAgainst A MIME type to check against. May include wildcards.
    322      * @return true if the mimeType matches
    323      */
    324     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    325         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
    326                 Pattern.CASE_INSENSITIVE);
    327         return p.matcher(mimeType).matches();
    328     }
    329 
    330     /**
    331      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
    332      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
    333      * (e.g. "image/*").
    334      *
    335      * @param mimeType A MIME type to check.
    336      * @param matchAgainst An array of MIME types to check against. May include wildcards.
    337      * @return true if the mimeType matches any of the matchAgainst strings
    338      */
    339     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    340         for (String matchType : matchAgainst) {
    341             if (mimeTypeMatches(mimeType, matchType)) {
    342                 return true;
    343             }
    344         }
    345         return false;
    346     }
    347 
    348     /**
    349      * Removes any content transfer encoding from the stream and returns a Body.
    350      */
    351     public static Body decodeBody(InputStream in, String contentTransferEncoding)
    352             throws IOException {
    353         /*
    354          * We'll remove any transfer encoding by wrapping the stream.
    355          */
    356         if (contentTransferEncoding != null) {
    357             contentTransferEncoding =
    358                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
    359             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
    360                 in = new QuotedPrintableInputStream(in);
    361             }
    362             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
    363                 in = new Base64InputStream(in);
    364             }
    365         }
    366 
    367         BinaryTempFileBody tempBody = new BinaryTempFileBody();
    368         OutputStream out = tempBody.getOutputStream();
    369         IOUtils.copy(in, out);
    370         out.close();
    371         return tempBody;
    372     }
    373 
    374     /**
    375      * An unfortunately named method that makes decisions about a Part (usually a Message)
    376      * as to which of it's children will be "viewable" and which will be attachments.
    377      * The method recursively sorts the viewables and attachments into seperate
    378      * lists for further processing.
    379      * @param part
    380      * @param viewables
    381      * @param attachments
    382      * @throws MessagingException
    383      */
    384     public static void collectParts(Part part, ArrayList<Part> viewables,
    385             ArrayList<Part> attachments) throws MessagingException {
    386         String disposition = part.getDisposition();
    387         String dispositionType = null;
    388         String dispositionFilename = null;
    389         if (disposition != null) {
    390             dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    391             dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename");
    392         }
    393 
    394         /*
    395          * A best guess that this part is intended to be an attachment and not inline.
    396          */
    397         boolean attachment = ("attachment".equalsIgnoreCase(dispositionType))
    398                 || (dispositionFilename != null)
    399                 && (!"inline".equalsIgnoreCase(dispositionType));
    400 
    401         /*
    402          * If the part is Multipart but not alternative it's either mixed or
    403          * something we don't know about, which means we treat it as mixed
    404          * per the spec. We just process it's pieces recursively.
    405          */
    406         if (part.getBody() instanceof Multipart) {
    407             Multipart mp = (Multipart)part.getBody();
    408             for (int i = 0; i < mp.getCount(); i++) {
    409                 collectParts(mp.getBodyPart(i), viewables, attachments);
    410             }
    411         }
    412         /*
    413          * If the part is an embedded message we just continue to process
    414          * it, pulling any viewables or attachments into the running list.
    415          */
    416         else if (part.getBody() instanceof Message) {
    417             Message message = (Message)part.getBody();
    418             collectParts(message, viewables, attachments);
    419         }
    420         /*
    421          * If the part is HTML and it got this far it's part of a mixed (et
    422          * al) and should be rendered inline.
    423          */
    424         else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/html"))) {
    425             viewables.add(part);
    426         }
    427         /*
    428          * If the part is plain text and it got this far it's part of a
    429          * mixed (et al) and should be rendered inline.
    430          */
    431         else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/plain"))) {
    432             viewables.add(part);
    433         }
    434         /*
    435          * Finally, if it's nothing else we will include it as an attachment.
    436          */
    437         else {
    438             attachments.add(part);
    439         }
    440     }
    441 }
    442