Home | History | Annotate | Download | only in decoder
      1 /****************************************************************
      2  * Licensed to the Apache Software Foundation (ASF) under one   *
      3  * or more contributor license agreements.  See the NOTICE file *
      4  * distributed with this work for additional information        *
      5  * regarding copyright ownership.  The ASF licenses this file   *
      6  * to you under the Apache License, Version 2.0 (the            *
      7  * "License"); you may not use this file except in compliance   *
      8  * with the License.  You may obtain a copy of the License at   *
      9  *                                                              *
     10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
     11  *                                                              *
     12  * Unless required by applicable law or agreed to in writing,   *
     13  * software distributed under the License is distributed on an  *
     14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
     15  * KIND, either express or implied.  See the License for the    *
     16  * specific language governing permissions and limitations      *
     17  * under the License.                                           *
     18  ****************************************************************/
     19 
     20 package org.apache.james.mime4j.decoder;
     21 
     22 //BEGIN android-changed: Stubbing out logging
     23 import org.apache.james.mime4j.Log;
     24 import org.apache.james.mime4j.LogFactory;
     25 //END android-changed
     26 import org.apache.james.mime4j.util.CharsetUtil;
     27 
     28 import java.io.ByteArrayInputStream;
     29 import java.io.ByteArrayOutputStream;
     30 import java.io.IOException;
     31 import java.io.UnsupportedEncodingException;
     32 
     33 /**
     34  * Static methods for decoding strings, byte arrays and encoded words.
     35  *
     36  *
     37  * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
     38  */
     39 public class DecoderUtil {
     40     private static Log log = LogFactory.getLog(DecoderUtil.class);
     41 
     42     /**
     43      * Decodes a string containing quoted-printable encoded data.
     44      *
     45      * @param s the string to decode.
     46      * @return the decoded bytes.
     47      */
     48     public static byte[] decodeBaseQuotedPrintable(String s) {
     49         ByteArrayOutputStream baos = new ByteArrayOutputStream();
     50 
     51         try {
     52             byte[] bytes = s.getBytes("US-ASCII");
     53 
     54             QuotedPrintableInputStream is = new QuotedPrintableInputStream(
     55                                                new ByteArrayInputStream(bytes));
     56 
     57             int b = 0;
     58             while ((b = is.read()) != -1) {
     59                 baos.write(b);
     60             }
     61         } catch (IOException e) {
     62             /*
     63              * This should never happen!
     64              */
     65             log.error(e);
     66         }
     67 
     68         return baos.toByteArray();
     69     }
     70 
     71     /**
     72      * Decodes a string containing base64 encoded data.
     73      *
     74      * @param s the string to decode.
     75      * @return the decoded bytes.
     76      */
     77     public static byte[] decodeBase64(String s) {
     78         ByteArrayOutputStream baos = new ByteArrayOutputStream();
     79 
     80         try {
     81             byte[] bytes = s.getBytes("US-ASCII");
     82 
     83             Base64InputStream is = new Base64InputStream(
     84                                         new ByteArrayInputStream(bytes));
     85 
     86             int b = 0;
     87             while ((b = is.read()) != -1) {
     88                 baos.write(b);
     89             }
     90         } catch (IOException e) {
     91             /*
     92              * This should never happen!
     93              */
     94             log.error(e);
     95         }
     96 
     97         return baos.toByteArray();
     98     }
     99 
    100     /**
    101      * Decodes an encoded word encoded with the 'B' encoding (described in
    102      * RFC 2047) found in a header field body.
    103      *
    104      * @param encodedWord the encoded word to decode.
    105      * @param charset the Java charset to use.
    106      * @return the decoded string.
    107      * @throws UnsupportedEncodingException if the given Java charset isn't
    108      *         supported.
    109      */
    110     public static String decodeB(String encodedWord, String charset)
    111             throws UnsupportedEncodingException {
    112 
    113         return new String(decodeBase64(encodedWord), charset);
    114     }
    115 
    116     /**
    117      * Decodes an encoded word encoded with the 'Q' encoding (described in
    118      * RFC 2047) found in a header field body.
    119      *
    120      * @param encodedWord the encoded word to decode.
    121      * @param charset the Java charset to use.
    122      * @return the decoded string.
    123      * @throws UnsupportedEncodingException if the given Java charset isn't
    124      *         supported.
    125      */
    126     public static String decodeQ(String encodedWord, String charset)
    127             throws UnsupportedEncodingException {
    128 
    129         /*
    130          * Replace _ with =20
    131          */
    132         StringBuffer sb = new StringBuffer();
    133         for (int i = 0; i < encodedWord.length(); i++) {
    134             char c = encodedWord.charAt(i);
    135             if (c == '_') {
    136                 sb.append("=20");
    137             } else {
    138                 sb.append(c);
    139             }
    140         }
    141 
    142         return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
    143     }
    144 
    145     /**
    146      * Decodes a string containing encoded words as defined by RFC 2047.
    147      * Encoded words in have the form
    148      * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
    149      * quoted-printable and 'B' or 'b' for Base64.
    150      *
    151      * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
    152      *
    153      * @param body the string to decode.
    154      * @return the decoded string.
    155      */
    156     public static String decodeEncodedWords(String body) {
    157 
    158         // ANDROID:  Most strings will not include "=?" so a quick test can prevent unneeded
    159         // object creation.  This could also be handled via lazy creation of the StringBuilder.
    160         if (body.indexOf("=?") == -1) {
    161             return body;
    162         }
    163 
    164         int previousEnd = 0;
    165         boolean previousWasEncoded = false;
    166 
    167         StringBuilder sb = new StringBuilder();
    168 
    169         while (true) {
    170             int begin = body.indexOf("=?", previousEnd);
    171 
    172             // ANDROID:  The mime4j original version has an error here.  It gets confused if
    173             // the encoded string begins with an '=' (just after "?Q?").  This patch seeks forward
    174             // to find the two '?' in the "header", before looking for the final "?=".
    175             int endScan = begin + 2;
    176             if (begin != -1) {
    177                 int qm1 = body.indexOf('?', endScan + 2);
    178                 int qm2 = body.indexOf('?', qm1 + 1);
    179                 if (qm2 != -1) {
    180                     endScan = qm2 + 1;
    181                 }
    182             }
    183 
    184             int end = begin == -1 ? -1 : body.indexOf("?=", endScan);
    185             if (end == -1) {
    186                 if (previousEnd == 0)
    187                     return body;
    188 
    189                 sb.append(body.substring(previousEnd));
    190                 return sb.toString();
    191             }
    192             end += 2;
    193 
    194             String sep = body.substring(previousEnd, begin);
    195 
    196             String decoded = decodeEncodedWord(body, begin, end);
    197             if (decoded == null) {
    198                 sb.append(sep);
    199                 sb.append(body.substring(begin, end));
    200             } else {
    201                 if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
    202                     sb.append(sep);
    203                 }
    204                 sb.append(decoded);
    205             }
    206 
    207             previousEnd = end;
    208             previousWasEncoded = decoded != null;
    209         }
    210     }
    211 
    212     // return null on error
    213     private static String decodeEncodedWord(String body, int begin, int end) {
    214         int qm1 = body.indexOf('?', begin + 2);
    215         if (qm1 == end - 2)
    216             return null;
    217 
    218         int qm2 = body.indexOf('?', qm1 + 1);
    219         if (qm2 == end - 2)
    220             return null;
    221 
    222         String mimeCharset = body.substring(begin + 2, qm1);
    223         String encoding = body.substring(qm1 + 1, qm2);
    224         String encodedText = body.substring(qm2 + 1, end - 2);
    225 
    226         String charset = CharsetUtil.toJavaCharset(mimeCharset);
    227         if (charset == null) {
    228             if (log.isWarnEnabled()) {
    229                 log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
    230                         + body.substring(begin, end) + "' doesn't have a "
    231                         + "corresponding Java charset");
    232             }
    233             return null;
    234         } else if (!CharsetUtil.isDecodingSupported(charset)) {
    235             if (log.isWarnEnabled()) {
    236                 log.warn("Current JDK doesn't support decoding of charset '"
    237                         + charset + "' (MIME charset '" + mimeCharset
    238                         + "' in encoded word '" + body.substring(begin, end)
    239                         + "')");
    240             }
    241             return null;
    242         }
    243 
    244         if (encodedText.length() == 0) {
    245             if (log.isWarnEnabled()) {
    246                 log.warn("Missing encoded text in encoded word: '"
    247                         + body.substring(begin, end) + "'");
    248             }
    249             return null;
    250         }
    251 
    252         try {
    253             if (encoding.equalsIgnoreCase("Q")) {
    254                 return DecoderUtil.decodeQ(encodedText, charset);
    255             } else if (encoding.equalsIgnoreCase("B")) {
    256                 return DecoderUtil.decodeB(encodedText, charset);
    257             } else {
    258                 if (log.isWarnEnabled()) {
    259                     log.warn("Warning: Unknown encoding in encoded word '"
    260                             + body.substring(begin, end) + "'");
    261                 }
    262                 return null;
    263             }
    264         } catch (UnsupportedEncodingException e) {
    265             // should not happen because of isDecodingSupported check above
    266             if (log.isWarnEnabled()) {
    267                 log.warn("Unsupported encoding in encoded word '"
    268                         + body.substring(begin, end) + "'", e);
    269             }
    270             return null;
    271         } catch (RuntimeException e) {
    272             if (log.isWarnEnabled()) {
    273                 log.warn("Could not decode encoded word '"
    274                         + body.substring(begin, end) + "'", e);
    275             }
    276             return null;
    277         }
    278     }
    279 }
    280