Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright 2001-2004 The Apache Software Foundation.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.apache.commons.codec.net;
     18 
     19 import java.io.ByteArrayOutputStream;
     20 import java.io.UnsupportedEncodingException;
     21 import java.util.BitSet;
     22 import org.apache.commons.codec.BinaryDecoder;
     23 import org.apache.commons.codec.BinaryEncoder;
     24 import org.apache.commons.codec.DecoderException;
     25 import org.apache.commons.codec.EncoderException;
     26 import org.apache.commons.codec.StringDecoder;
     27 import org.apache.commons.codec.StringEncoder;
     28 
     29 /**
     30  * <p>
     31  * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
     32  * </p>
     33  * <p>
     34  * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
     35  * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
     36  * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
     37  * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
     38  * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
     39  * gateway.
     40  * </p>
     41  *
     42  * <p>
     43  * Note:
     44  * </p>
     45  * <p>
     46  * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
     47  * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
     48  * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
     49  * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
     50  * </p>
     51  *
     52  * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
     53  *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
     54  *
     55  * @author Apache Software Foundation
     56  * @since 1.3
     57  * @version $Id: QuotedPrintableCodec.java,v 1.7 2004/04/09 22:21:07 ggregory Exp $
     58  */
     59 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
     60     /**
     61      * The default charset used for string decoding and encoding.
     62      */
     63     private String charset = StringEncodings.UTF8;
     64 
     65     /**
     66      * BitSet of printable characters as defined in RFC 1521.
     67      */
     68     private static final BitSet PRINTABLE_CHARS = new BitSet(256);
     69 
     70     private static byte ESCAPE_CHAR = '=';
     71 
     72     private static byte TAB = 9;
     73 
     74     private static byte SPACE = 32;
     75     // Static initializer for printable chars collection
     76     static {
     77         // alpha characters
     78         for (int i = 33; i <= 60; i++) {
     79             PRINTABLE_CHARS.set(i);
     80         }
     81         for (int i = 62; i <= 126; i++) {
     82             PRINTABLE_CHARS.set(i);
     83         }
     84         PRINTABLE_CHARS.set(TAB);
     85         PRINTABLE_CHARS.set(SPACE);
     86     }
     87 
     88     /**
     89      * Default constructor.
     90      */
     91     public QuotedPrintableCodec() {
     92         super();
     93     }
     94 
     95     /**
     96      * Constructor which allows for the selection of a default charset
     97      *
     98      * @param charset
     99      *                  the default string charset to use.
    100      */
    101     public QuotedPrintableCodec(String charset) {
    102         super();
    103         this.charset = charset;
    104     }
    105 
    106     /**
    107      * Encodes byte into its quoted-printable representation.
    108      *
    109      * @param b
    110      *                  byte to encode
    111      * @param buffer
    112      *                  the buffer to write to
    113      */
    114     private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
    115         buffer.write(ESCAPE_CHAR);
    116         char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
    117         char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
    118         buffer.write(hex1);
    119         buffer.write(hex2);
    120     }
    121 
    122     /**
    123      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
    124      *
    125      * <p>
    126      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
    127      * RFC 1521 and is suitable for encoding binary data and unformatted text.
    128      * </p>
    129      *
    130      * @param printable
    131      *                  bitset of characters deemed quoted-printable
    132      * @param bytes
    133      *                  array of bytes to be encoded
    134      * @return array of bytes containing quoted-printable data
    135      */
    136     public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
    137         if (bytes == null) {
    138             return null;
    139         }
    140         if (printable == null) {
    141             printable = PRINTABLE_CHARS;
    142         }
    143         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    144         for (int i = 0; i < bytes.length; i++) {
    145             int b = bytes[i];
    146             if (b < 0) {
    147                 b = 256 + b;
    148             }
    149             if (printable.get(b)) {
    150                 buffer.write(b);
    151             } else {
    152                 encodeQuotedPrintable(b, buffer);
    153             }
    154         }
    155         return buffer.toByteArray();
    156     }
    157 
    158     /**
    159      * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
    160      * back to their original representation.
    161      *
    162      * <p>
    163      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
    164      * RFC 1521.
    165      * </p>
    166      *
    167      * @param bytes
    168      *                  array of quoted-printable characters
    169      * @return array of original bytes
    170      * @throws DecoderException
    171      *                  Thrown if quoted-printable decoding is unsuccessful
    172      */
    173     public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
    174         if (bytes == null) {
    175             return null;
    176         }
    177         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    178         for (int i = 0; i < bytes.length; i++) {
    179             int b = bytes[i];
    180             if (b == ESCAPE_CHAR) {
    181                 try {
    182                     int u = Character.digit((char) bytes[++i], 16);
    183                     int l = Character.digit((char) bytes[++i], 16);
    184                     if (u == -1 || l == -1) {
    185                         throw new DecoderException("Invalid quoted-printable encoding");
    186                     }
    187                     buffer.write((char) ((u << 4) + l));
    188                 } catch (ArrayIndexOutOfBoundsException e) {
    189                     throw new DecoderException("Invalid quoted-printable encoding");
    190                 }
    191             } else {
    192                 buffer.write(b);
    193             }
    194         }
    195         return buffer.toByteArray();
    196     }
    197 
    198     /**
    199      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
    200      *
    201      * <p>
    202      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
    203      * RFC 1521 and is suitable for encoding binary data and unformatted text.
    204      * </p>
    205      *
    206      * @param bytes
    207      *                  array of bytes to be encoded
    208      * @return array of bytes containing quoted-printable data
    209      */
    210     public byte[] encode(byte[] bytes) {
    211         return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
    212     }
    213 
    214     /**
    215      * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
    216      * back to their original representation.
    217      *
    218      * <p>
    219      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
    220      * RFC 1521.
    221      * </p>
    222      *
    223      * @param bytes
    224      *                  array of quoted-printable characters
    225      * @return array of original bytes
    226      * @throws DecoderException
    227      *                  Thrown if quoted-printable decoding is unsuccessful
    228      */
    229     public byte[] decode(byte[] bytes) throws DecoderException {
    230         return decodeQuotedPrintable(bytes);
    231     }
    232 
    233     /**
    234      * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
    235      *
    236      * <p>
    237      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
    238      * RFC 1521 and is suitable for encoding binary data.
    239      * </p>
    240      *
    241      * @param pString
    242      *                  string to convert to quoted-printable form
    243      * @return quoted-printable string
    244      *
    245      * @throws EncoderException
    246      *                  Thrown if quoted-printable encoding is unsuccessful
    247      *
    248      * @see #getDefaultCharset()
    249      */
    250     public String encode(String pString) throws EncoderException {
    251         if (pString == null) {
    252             return null;
    253         }
    254         try {
    255             return encode(pString, getDefaultCharset());
    256         } catch (UnsupportedEncodingException e) {
    257             throw new EncoderException(e.getMessage());
    258         }
    259     }
    260 
    261     /**
    262      * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
    263      * are converted back to their original representation.
    264      *
    265      * @param pString
    266      *                  quoted-printable string to convert into its original form
    267      * @param charset
    268      *                  the original string charset
    269      * @return original string
    270      * @throws DecoderException
    271      *                  Thrown if quoted-printable decoding is unsuccessful
    272      * @throws UnsupportedEncodingException
    273      *                  Thrown if charset is not supported
    274      */
    275     public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
    276         if (pString == null) {
    277             return null;
    278         }
    279         return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
    280     }
    281 
    282     /**
    283      * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
    284      * converted back to their original representation.
    285      *
    286      * @param pString
    287      *                  quoted-printable string to convert into its original form
    288      * @return original string
    289      * @throws DecoderException
    290      *                  Thrown if quoted-printable decoding is unsuccessful
    291      * @throws UnsupportedEncodingException
    292      *                  Thrown if charset is not supported
    293      * @see #getDefaultCharset()
    294      */
    295     public String decode(String pString) throws DecoderException {
    296         if (pString == null) {
    297             return null;
    298         }
    299         try {
    300             return decode(pString, getDefaultCharset());
    301         } catch (UnsupportedEncodingException e) {
    302             throw new DecoderException(e.getMessage());
    303         }
    304     }
    305 
    306     /**
    307      * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
    308      *
    309      * @param pObject
    310      *                  string to convert to a quoted-printable form
    311      * @return quoted-printable object
    312      * @throws EncoderException
    313      *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
    314      *                  unsuccessful
    315      */
    316     public Object encode(Object pObject) throws EncoderException {
    317         if (pObject == null) {
    318             return null;
    319         } else if (pObject instanceof byte[]) {
    320             return encode((byte[]) pObject);
    321         } else if (pObject instanceof String) {
    322             return encode((String) pObject);
    323         } else {
    324             throw new EncoderException("Objects of type "
    325                 + pObject.getClass().getName()
    326                 + " cannot be quoted-printable encoded");
    327         }
    328     }
    329 
    330     /**
    331      * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
    332      * representation.
    333      *
    334      * @param pObject
    335      *                  quoted-printable object to convert into its original form
    336      * @return original object
    337      * @throws DecoderException
    338      *                  Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is
    339      *                  unsuccessful
    340      */
    341     public Object decode(Object pObject) throws DecoderException {
    342         if (pObject == null) {
    343             return null;
    344         } else if (pObject instanceof byte[]) {
    345             return decode((byte[]) pObject);
    346         } else if (pObject instanceof String) {
    347             return decode((String) pObject);
    348         } else {
    349             throw new DecoderException("Objects of type "
    350                 + pObject.getClass().getName()
    351                 + " cannot be quoted-printable decoded");
    352         }
    353     }
    354 
    355     /**
    356      * Returns the default charset used for string decoding and encoding.
    357      *
    358      * @return the default string charset.
    359      */
    360     public String getDefaultCharset() {
    361         return this.charset;
    362     }
    363 
    364     /**
    365      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
    366      *
    367      * <p>
    368      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
    369      * RFC 1521 and is suitable for encoding binary data and unformatted text.
    370      * </p>
    371      *
    372      * @param pString
    373      *                  string to convert to quoted-printable form
    374      * @param charset
    375      *                  the charset for pString
    376      * @return quoted-printable string
    377      *
    378      * @throws UnsupportedEncodingException
    379      *                  Thrown if the charset is not supported
    380      */
    381     public String encode(String pString, String charset) throws UnsupportedEncodingException {
    382         if (pString == null) {
    383             return null;
    384         }
    385         return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
    386     }
    387 }
    388