Home | History | Annotate | Download | only in internet
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package com.android.voicemail.impl.mail.internet;
     17 
     18 import android.text.TextUtils;
     19 import android.util.Base64;
     20 import android.util.Base64DataException;
     21 import android.util.Base64InputStream;
     22 import com.android.voicemail.impl.VvmLog;
     23 import com.android.voicemail.impl.mail.Body;
     24 import com.android.voicemail.impl.mail.BodyPart;
     25 import com.android.voicemail.impl.mail.Message;
     26 import com.android.voicemail.impl.mail.MessagingException;
     27 import com.android.voicemail.impl.mail.Multipart;
     28 import com.android.voicemail.impl.mail.Part;
     29 import java.io.ByteArrayOutputStream;
     30 import java.io.IOException;
     31 import java.io.InputStream;
     32 import java.io.OutputStream;
     33 import java.util.ArrayList;
     34 import java.util.regex.Matcher;
     35 import java.util.regex.Pattern;
     36 import org.apache.commons.io.IOUtils;
     37 import org.apache.james.mime4j.codec.DecodeMonitor;
     38 import org.apache.james.mime4j.codec.DecoderUtil;
     39 import org.apache.james.mime4j.codec.EncoderUtil;
     40 import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
     41 import org.apache.james.mime4j.util.CharsetUtil;
     42 
     43 public class MimeUtility {
     44   private static final String LOG_TAG = "Email";
     45 
     46   public static final String MIME_TYPE_RFC822 = "message/rfc822";
     47   private static final Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
     48 
     49   /**
     50    * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string object whenever
     51    * possible.
     52    */
     53   public static String unfold(String s) {
     54     if (s == null) {
     55       return null;
     56     }
     57     Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
     58     if (patternMatcher.find()) {
     59       patternMatcher.reset();
     60       s = patternMatcher.replaceAll("");
     61     }
     62     return s;
     63   }
     64 
     65   public static String decode(String s) {
     66     if (s == null) {
     67       return null;
     68     }
     69     return DecoderUtil.decodeEncodedWords(s, DecodeMonitor.STRICT);
     70   }
     71 
     72   public static String unfoldAndDecode(String s) {
     73     return decode(unfold(s));
     74   }
     75 
     76   // TODO implement proper foldAndEncode
     77   // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
     78   // duplication of encoding.
     79   public static String foldAndEncode(String s) {
     80     return s;
     81   }
     82 
     83   /**
     84    * INTERIM version of foldAndEncode that will be used only by Subject: headers. This is safer than
     85    * implementing foldAndEncode() (see above) and risking unknown damage to other headers.
     86    *
     87    * <p>TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
     88    *
     89    * @param s original string to encode and fold
     90    * @param usedCharacters number of characters already used up by header name
     91    * @return the String ready to be transmitted
     92    */
     93   public static String foldAndEncode2(String s, int usedCharacters) {
     94     // james.mime4j.codec.EncoderUtil.java
     95     // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
     96     // Usage.TEXT_TOKENlooks like the right thing for subjects
     97     // use WORD_ENTITY for address/names
     98 
     99     String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, usedCharacters);
    100 
    101     return fold(encoded, usedCharacters);
    102   }
    103 
    104   /**
    105    * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
    106    * MimeUtil class).
    107    *
    108    * <p>Splits the specified string into a multiple-line representation with lines no longer than 76
    109    * characters (because the line might contain encoded words; see <a
    110    * href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 2047</a> section 2). If the string contains
    111    * non-whitespace sequences longer than 76 characters a line break is inserted at the whitespace
    112    * character following the sequence resulting in a line longer than 76 characters.
    113    *
    114    * @param s string to split.
    115    * @param usedCharacters number of characters already used up. Usually the number of characters
    116    *     for header field name plus colon and one space.
    117    * @return a multiple-line representation of the given string.
    118    */
    119   public static String fold(String s, int usedCharacters) {
    120     final int maxCharacters = 76;
    121 
    122     final int length = s.length();
    123     if (usedCharacters + length <= maxCharacters) {
    124       return s;
    125     }
    126 
    127     StringBuilder sb = new StringBuilder();
    128 
    129     int lastLineBreak = -usedCharacters;
    130     int wspIdx = indexOfWsp(s, 0);
    131     while (true) {
    132       if (wspIdx == length) {
    133         sb.append(s.substring(Math.max(0, lastLineBreak)));
    134         return sb.toString();
    135       }
    136 
    137       int nextWspIdx = indexOfWsp(s, wspIdx + 1);
    138 
    139       if (nextWspIdx - lastLineBreak > maxCharacters) {
    140         sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
    141         sb.append("\r\n");
    142         lastLineBreak = wspIdx;
    143       }
    144 
    145       wspIdx = nextWspIdx;
    146     }
    147   }
    148 
    149   /**
    150    * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
    151    * MimeUtil class).
    152    *
    153    * <p>Search for whitespace.
    154    */
    155   private static int indexOfWsp(String s, int fromIndex) {
    156     final int len = s.length();
    157     for (int index = fromIndex; index < len; index++) {
    158       char c = s.charAt(index);
    159       if (c == ' ' || c == '\t') {
    160         return index;
    161       }
    162     }
    163     return len;
    164   }
    165 
    166   /**
    167    * Returns the named parameter of a header field. If name is null the first parameter is returned,
    168    * or if there are no additional parameters in the field the entire field is returned. Otherwise
    169    * the named parameter is searched for in a case insensitive fashion and returned. If the
    170    * parameter cannot be found the method returns null.
    171    *
    172    * <p>TODO: quite inefficient with the inner trimming & splitting. TODO: Also has a latent bug:
    173    * uses "startsWith" to match the name, which can false-positive. TODO: The doc says that for a
    174    * null name you get the first param, but you get the header. Should probably just fix the doc,
    175    * but if other code assumes that behavior, fix the code. TODO: Need to decode %-escaped strings,
    176    * as in: filename="ab%22d". ('+' -> ' ' conversion too? check RFC)
    177    *
    178    * @param header
    179    * @param name
    180    * @return the entire header (if name=null), the found parameter, or null
    181    */
    182   public static String getHeaderParameter(String header, String name) {
    183     if (header == null) {
    184       return null;
    185     }
    186     String[] parts = unfold(header).split(";");
    187     if (name == null) {
    188       return parts[0].trim();
    189     }
    190     String lowerCaseName = name.toLowerCase();
    191     for (String part : parts) {
    192       if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
    193         String[] parameterParts = part.split("=", 2);
    194         if (parameterParts.length < 2) {
    195           return null;
    196         }
    197         String parameter = parameterParts[1].trim();
    198         if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
    199           return parameter.substring(1, parameter.length() - 1);
    200         } else {
    201           return parameter;
    202         }
    203       }
    204     }
    205     return null;
    206   }
    207 
    208   /**
    209    * Reads the Part's body and returns a String based on any charset conversion that needed to be
    210    * done.
    211    *
    212    * @param part The part containing a body
    213    * @return a String containing the converted text in the body, or null if there was no text or an
    214    *     error during conversion.
    215    */
    216   public static String getTextFromPart(Part part) {
    217     try {
    218       if (part != null && part.getBody() != null) {
    219         InputStream in = part.getBody().getInputStream();
    220         String mimeType = part.getMimeType();
    221         if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
    222           /*
    223            * Now we read the part into a buffer for further processing. Because
    224            * the stream is now wrapped we'll remove any transfer encoding at this point.
    225            */
    226           ByteArrayOutputStream out = new ByteArrayOutputStream();
    227           IOUtils.copy(in, out);
    228           in.close();
    229           in = null; // we want all of our memory back, and close might not release
    230 
    231           /*
    232            * We've got a text part, so let's see if it needs to be processed further.
    233            */
    234           String charset = getHeaderParameter(part.getContentType(), "charset");
    235           if (charset != null) {
    236             /*
    237              * See if there is conversion from the MIME charset to the Java one.
    238              */
    239             charset = CharsetUtil.lookup(charset).name();
    240           }
    241           /*
    242            * No encoding, so use us-ascii, which is the standard.
    243            */
    244           if (charset == null) {
    245             charset = "ASCII";
    246           }
    247           /*
    248            * Convert and return as new String
    249            */
    250           String result = out.toString(charset);
    251           out.close();
    252           return result;
    253         }
    254       }
    255 
    256     } catch (OutOfMemoryError oom) {
    257       /*
    258        * If we are not able to process the body there's nothing we can do about it. Return
    259        * null and let the upper layers handle the missing content.
    260        */
    261       VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
    262     } catch (Exception e) {
    263       /*
    264        * If we are not able to process the body there's nothing we can do about it. Return
    265        * null and let the upper layers handle the missing content.
    266        */
    267       VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
    268     }
    269     return null;
    270   }
    271 
    272   /**
    273    * Returns true if the given mimeType matches the matchAgainst specification. The comparison
    274    * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
    275    *
    276    * @param mimeType A MIME type to check.
    277    * @param matchAgainst A MIME type to check against. May include wildcards.
    278    * @return true if the mimeType matches
    279    */
    280   public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
    281     Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), Pattern.CASE_INSENSITIVE);
    282     return p.matcher(mimeType).matches();
    283   }
    284 
    285   /**
    286    * Returns true if the given mimeType matches any of the matchAgainst specifications. The
    287    * comparison ignores case and the matchAgainst strings may include "*" for a wildcard (e.g.
    288    * "image/*").
    289    *
    290    * @param mimeType A MIME type to check.
    291    * @param matchAgainst An array of MIME types to check against. May include wildcards.
    292    * @return true if the mimeType matches any of the matchAgainst strings
    293    */
    294   public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
    295     for (String matchType : matchAgainst) {
    296       if (mimeTypeMatches(mimeType, matchType)) {
    297         return true;
    298       }
    299     }
    300     return false;
    301   }
    302 
    303   /**
    304    * Given an input stream and a transfer encoding, return a wrapped input stream for that encoding
    305    * (or the original if none is required)
    306    *
    307    * @param in the input stream
    308    * @param contentTransferEncoding the content transfer encoding
    309    * @return a properly wrapped stream
    310    */
    311   public static InputStream getInputStreamForContentTransferEncoding(
    312       InputStream in, String contentTransferEncoding) {
    313     if (contentTransferEncoding != null) {
    314       contentTransferEncoding = MimeUtility.getHeaderParameter(contentTransferEncoding, null);
    315       if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
    316         in = new QuotedPrintableInputStream(in);
    317       } else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
    318         in = new Base64InputStream(in, Base64.DEFAULT);
    319       }
    320     }
    321     return in;
    322   }
    323 
    324   /** Removes any content transfer encoding from the stream and returns a Body. */
    325   public static Body decodeBody(InputStream in, String contentTransferEncoding) throws IOException {
    326     /*
    327      * We'll remove any transfer encoding by wrapping the stream.
    328      */
    329     in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
    330     BinaryTempFileBody tempBody = new BinaryTempFileBody();
    331     OutputStream out = tempBody.getOutputStream();
    332     try {
    333       IOUtils.copy(in, out);
    334     } catch (Base64DataException bde) {
    335       // TODO Need to fix this somehow
    336       //String warning = "\n\n" + Email.getMessageDecodeErrorString();
    337       //out.write(warning.getBytes());
    338     } finally {
    339       out.close();
    340     }
    341     return tempBody;
    342   }
    343 
    344   /**
    345    * Recursively scan a Part (usually a Message) and sort out which of its children will be
    346    * "viewable" and which will be attachments.
    347    *
    348    * @param part The part to be broken down
    349    * @param viewables This arraylist will be populated with all parts that appear to be the
    350    *     "message" (e.g. text/plain & text/html)
    351    * @param attachments This arraylist will be populated with all parts that appear to be
    352    *     attachments (including inlines)
    353    * @throws MessagingException
    354    */
    355   public static void collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)
    356       throws MessagingException {
    357     String disposition = part.getDisposition();
    358     String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
    359     // If a disposition is not specified, default to "inline"
    360     boolean inline =
    361         TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
    362     // The lower-case mime type
    363     String mimeType = part.getMimeType().toLowerCase();
    364 
    365     if (part.getBody() instanceof Multipart) {
    366       // If the part is Multipart but not alternative it's either mixed or
    367       // something we don't know about, which means we treat it as mixed
    368       // per the spec. We just process its pieces recursively.
    369       MimeMultipart mp = (MimeMultipart) part.getBody();
    370       boolean foundHtml = false;
    371       if (mp.getSubTypeForTest().equals("alternative")) {
    372         for (int i = 0; i < mp.getCount(); i++) {
    373           if (mp.getBodyPart(i).isMimeType("text/html")) {
    374             foundHtml = true;
    375             break;
    376           }
    377         }
    378       }
    379       for (int i = 0; i < mp.getCount(); i++) {
    380         // See if we have text and html
    381         BodyPart bp = mp.getBodyPart(i);
    382         // If there's html, don't bother loading text
    383         if (foundHtml && bp.isMimeType("text/plain")) {
    384           continue;
    385         }
    386         collectParts(bp, viewables, attachments);
    387       }
    388     } else if (part.getBody() instanceof Message) {
    389       // If the part is an embedded message we just continue to process
    390       // it, pulling any viewables or attachments into the running list.
    391       Message message = (Message) part.getBody();
    392       collectParts(message, viewables, attachments);
    393     } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
    394       // We'll treat text and images as viewables
    395       viewables.add(part);
    396     } else {
    397       // Everything else is an attachment.
    398       attachments.add(part);
    399     }
    400   }
    401 }
    402