Home | History | Annotate | Download | only in decoder
      1 /****************************************************************
      2  * Licensed to the Apache Software Foundation (ASF) under one   *
      3  * or more contributor license agreements.  See the NOTICE file *
      4  * distributed with this work for additional information        *
      5  * regarding copyright ownership.  The ASF licenses this file   *
      6  * to you under the Apache License, Version 2.0 (the            *
      7  * "License"); you may not use this file except in compliance   *
      8  * with the License.  You may obtain a copy of the License at   *
      9  *                                                              *
     10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
     11  *                                                              *
     12  * Unless required by applicable law or agreed to in writing,   *
     13  * software distributed under the License is distributed on an  *
     14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
     15  * KIND, either express or implied.  See the License for the    *
     16  * specific language governing permissions and limitations      *
     17  * under the License.                                           *
     18  ****************************************************************/
     19 
     20 package org.apache.james.mime4j.decoder;
     21 
     22 //BEGIN android-changed: Stubbing out logging
     23 import org.apache.james.mime4j.Log;
     24 import org.apache.james.mime4j.LogFactory;
     25 //END android-changed
     26 import org.apache.james.mime4j.util.CharsetUtil;
     27 
     28 import java.io.ByteArrayInputStream;
     29 import java.io.ByteArrayOutputStream;
     30 import java.io.IOException;
     31 import java.io.UnsupportedEncodingException;
     32 
     33 /**
     34  * Static methods for decoding strings, byte arrays and encoded words.
     35  *
     36  *
     37  * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
     38  */
     39 public class DecoderUtil {
     40     private static Log log = LogFactory.getLog(DecoderUtil.class);
     41 
     42     /**
     43      * Decodes a string containing quoted-printable encoded data.
     44      *
     45      * @param s the string to decode.
     46      * @return the decoded bytes.
     47      */
     48     public static byte[] decodeBaseQuotedPrintable(String s) {
     49         ByteArrayOutputStream baos = new ByteArrayOutputStream();
     50 
     51         try {
     52             byte[] bytes = s.getBytes("US-ASCII");
     53 
     54             QuotedPrintableInputStream is = new QuotedPrintableInputStream(
     55                                                new ByteArrayInputStream(bytes));
     56 
     57             int b = 0;
     58             while ((b = is.read()) != -1) {
     59                 baos.write(b);
     60             }
     61         } catch (IOException e) {
     62             /*
     63              * This should never happen!
     64              */
     65             log.error(e);
     66         }
     67 
     68         return baos.toByteArray();
     69     }
     70 
     71     /**
     72      * Decodes a string containing base64 encoded data.
     73      *
     74      * @param s the string to decode.
     75      * @return the decoded bytes.
     76      */
     77     public static byte[] decodeBase64(String s) {
     78         ByteArrayOutputStream baos = new ByteArrayOutputStream();
     79 
     80         try {
     81             byte[] bytes = s.getBytes("US-ASCII");
     82 
     83             Base64InputStream is = new Base64InputStream(
     84                                         new ByteArrayInputStream(bytes));
     85 
     86             int b = 0;
     87             while ((b = is.read()) != -1) {
     88                 baos.write(b);
     89             }
     90         } catch (IOException e) {
     91             /*
     92              * This should never happen!
     93              */
     94             log.error(e);
     95         }
     96 
     97         return baos.toByteArray();
     98     }
     99 
    100     /**
    101      * Decodes an encoded word encoded with the 'B' encoding (described in
    102      * RFC 2047) found in a header field body.
    103      *
    104      * @param encodedWord the encoded word to decode.
    105      * @param charset the Java charset to use.
    106      * @return the decoded string.
    107      * @throws UnsupportedEncodingException if the given Java charset isn't
    108      *         supported.
    109      */
    110     public static String decodeB(String encodedWord, String charset)
    111             throws UnsupportedEncodingException {
    112 
    113         return new String(decodeBase64(encodedWord), charset);
    114     }
    115 
    116     /**
    117      * Decodes an encoded word encoded with the 'Q' encoding (described in
    118      * RFC 2047) found in a header field body.
    119      *
    120      * @param encodedWord the encoded word to decode.
    121      * @param charset the Java charset to use.
    122      * @return the decoded string.
    123      * @throws UnsupportedEncodingException if the given Java charset isn't
    124      *         supported.
    125      */
    126     public static String decodeQ(String encodedWord, String charset)
    127             throws UnsupportedEncodingException {
    128 
    129         /*
    130          * Replace _ with =20
    131          */
    132         StringBuffer sb = new StringBuffer();
    133         for (int i = 0; i < encodedWord.length(); i++) {
    134             char c = encodedWord.charAt(i);
    135             if (c == '_') {
    136                 sb.append("=20");
    137             } else {
    138                 sb.append(c);
    139             }
    140         }
    141 
    142         return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
    143     }
    144 
    145     /**
    146      * Decodes a string containing encoded words as defined by RFC 2047.
    147      * Encoded words in have the form
    148      * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
    149      * quoted-printable and 'B' or 'b' for Base64.
    150      *
    151      * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
    152      *
    153      * @param body the string to decode.
    154      * @return the decoded string.
    155      */
    156     public static String decodeEncodedWords(String body) {
    157 
    158         // ANDROID:  Most strings will not include "=?" so a quick test can prevent unneeded
    159         // object creation.  This could also be handled via lazy creation of the StringBuilder.
    160         if (body.indexOf("=?") == -1) {
    161             return body;
    162         }
    163 
    164         int previousEnd = 0;
    165         boolean previousWasEncoded = false;
    166 
    167         StringBuilder sb = new StringBuilder();
    168 
    169         while (true) {
    170             int begin = body.indexOf("=?", previousEnd);
    171 
    172             // ANDROID:  The mime4j original version has an error here.  It gets confused if
    173             // the encoded string begins with an '=' (just after "?Q?").  This patch seeks forward
    174             // to find the two '?' in the "header", before looking for the final "?=".
    175             if (begin == -1) {
    176                 break;
    177             }
    178             int qm1 = body.indexOf('?', begin + 2);
    179             if (qm1 == -1) {
    180                 break;
    181             }
    182             int qm2 = body.indexOf('?', qm1 + 1);
    183             if (qm2 == -1) {
    184                 break;
    185             }
    186             int end = body.indexOf("?=", qm2 + 1);
    187             if (end == -1) {
    188                 break;
    189             }
    190             end += 2;
    191 
    192             String sep = body.substring(previousEnd, begin);
    193 
    194             String decoded = decodeEncodedWord(body, begin, end);
    195             if (decoded == null) {
    196                 sb.append(sep);
    197                 sb.append(body.substring(begin, end));
    198             } else {
    199                 if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
    200                     sb.append(sep);
    201                 }
    202                 sb.append(decoded);
    203             }
    204 
    205             previousEnd = end;
    206             previousWasEncoded = decoded != null;
    207         }
    208 
    209         if (previousEnd == 0)
    210             return body;
    211 
    212         sb.append(body.substring(previousEnd));
    213         return sb.toString();
    214     }
    215 
    216     // return null on error. Begin is index of '=?' in body.
    217     public static String decodeEncodedWord(String body, int begin, int end) {
    218         // Skip the '?=' chars in body and scan forward from there for next '?'
    219         int qm1 = body.indexOf('?', begin + 2);
    220         if (qm1 == -1 || qm1 == end - 2)
    221             return null;
    222 
    223         int qm2 = body.indexOf('?', qm1 + 1);
    224         if (qm2 == -1 || qm2 == end - 2)
    225             return null;
    226 
    227         String mimeCharset = body.substring(begin + 2, qm1);
    228         String encoding = body.substring(qm1 + 1, qm2);
    229         String encodedText = body.substring(qm2 + 1, end - 2);
    230 
    231         String charset = CharsetUtil.toJavaCharset(mimeCharset);
    232         if (charset == null) {
    233             if (log.isWarnEnabled()) {
    234                 log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
    235                         + body.substring(begin, end) + "' doesn't have a "
    236                         + "corresponding Java charset");
    237             }
    238             return null;
    239         } else if (!CharsetUtil.isDecodingSupported(charset)) {
    240             if (log.isWarnEnabled()) {
    241                 log.warn("Current JDK doesn't support decoding of charset '"
    242                         + charset + "' (MIME charset '" + mimeCharset
    243                         + "' in encoded word '" + body.substring(begin, end)
    244                         + "')");
    245             }
    246             return null;
    247         }
    248 
    249         if (encodedText.length() == 0) {
    250             if (log.isWarnEnabled()) {
    251                 log.warn("Missing encoded text in encoded word: '"
    252                         + body.substring(begin, end) + "'");
    253             }
    254             return null;
    255         }
    256 
    257         try {
    258             if (encoding.equalsIgnoreCase("Q")) {
    259                 return DecoderUtil.decodeQ(encodedText, charset);
    260             } else if (encoding.equalsIgnoreCase("B")) {
    261                 return DecoderUtil.decodeB(encodedText, charset);
    262             } else {
    263                 if (log.isWarnEnabled()) {
    264                     log.warn("Warning: Unknown encoding in encoded word '"
    265                             + body.substring(begin, end) + "'");
    266                 }
    267                 return null;
    268             }
    269         } catch (UnsupportedEncodingException e) {
    270             // should not happen because of isDecodingSupported check above
    271             if (log.isWarnEnabled()) {
    272                 log.warn("Unsupported encoding in encoded word '"
    273                         + body.substring(begin, end) + "'", e);
    274             }
    275             return null;
    276         } catch (RuntimeException e) {
    277             if (log.isWarnEnabled()) {
    278                 log.warn("Could not decode encoded word '"
    279                         + body.substring(begin, end) + "'", e);
    280             }
    281             return null;
    282         }
    283     }
    284 }
    285