Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright 2001-2004 The Apache Software Foundation.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.apache.commons.codec.net;
     18 
     19 import java.io.ByteArrayOutputStream;
     20 import java.io.UnsupportedEncodingException;
     21 import java.util.BitSet;
     22 
     23 import org.apache.commons.codec.BinaryDecoder;
     24 import org.apache.commons.codec.BinaryEncoder;
     25 import org.apache.commons.codec.DecoderException;
     26 import org.apache.commons.codec.EncoderException;
     27 import org.apache.commons.codec.StringDecoder;
     28 import org.apache.commons.codec.StringEncoder;
     29 
     30 /**
     31  * <p>Implements the 'www-form-urlencoded' encoding scheme,
     32  * also misleadingly known as URL encoding.</p>
     33  *
     34  * <p>For more detailed information please refer to
     35  * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
     36  * Chapter 17.13.4 'Form content types'</a> of the
     37  * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
     38  *
     39  * <p>
     40  * This codec is meant to be a replacement for standard Java classes
     41  * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
     42  * on older Java platforms, as these classes in Java versions below
     43  * 1.4 rely on the platform's default charset encoding.
     44  * </p>
     45  *
     46  * @author Apache Software Foundation
     47  * @since 1.2
     48  * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
     49  *
     50  * @deprecated Please use {@link java.net.URL#openConnection} instead.
     51  *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
     52  *     for further details.
     53  */
     54 @Deprecated
     55 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
     56 
     57     /**
     58      * The default charset used for string decoding and encoding.
     59      */
     60     protected String charset = StringEncodings.UTF8;
     61 
     62     protected static byte ESCAPE_CHAR = '%';
     63     /**
     64      * BitSet of www-form-url safe characters.
     65      */
     66     protected static final BitSet WWW_FORM_URL = new BitSet(256);
     67 
     68     // Static initializer for www_form_url
     69     static {
     70         // alpha characters
     71         for (int i = 'a'; i <= 'z'; i++) {
     72             WWW_FORM_URL.set(i);
     73         }
     74         for (int i = 'A'; i <= 'Z'; i++) {
     75             WWW_FORM_URL.set(i);
     76         }
     77         // numeric characters
     78         for (int i = '0'; i <= '9'; i++) {
     79             WWW_FORM_URL.set(i);
     80         }
     81         // special chars
     82         WWW_FORM_URL.set('-');
     83         WWW_FORM_URL.set('_');
     84         WWW_FORM_URL.set('.');
     85         WWW_FORM_URL.set('*');
     86         // blank to be replaced with +
     87         WWW_FORM_URL.set(' ');
     88     }
     89 
     90 
     91     /**
     92      * Default constructor.
     93      */
     94     public URLCodec() {
     95         super();
     96     }
     97 
     98     /**
     99      * Constructor which allows for the selection of a default charset
    100      *
    101      * @param charset the default string charset to use.
    102      */
    103     public URLCodec(String charset) {
    104         super();
    105         this.charset = charset;
    106     }
    107 
    108     /**
    109      * Encodes an array of bytes into an array of URL safe 7-bit
    110      * characters. Unsafe characters are escaped.
    111      *
    112      * @param urlsafe bitset of characters deemed URL safe
    113      * @param bytes array of bytes to convert to URL safe characters
    114      * @return array of bytes containing URL safe characters
    115      */
    116     public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes)
    117     {
    118         if (bytes == null) {
    119             return null;
    120         }
    121         if (urlsafe == null) {
    122             urlsafe = WWW_FORM_URL;
    123         }
    124 
    125         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    126         for (int i = 0; i < bytes.length; i++) {
    127             int b = bytes[i];
    128             if (b < 0) {
    129                 b = 256 + b;
    130             }
    131             if (urlsafe.get(b)) {
    132                 if (b == ' ') {
    133                     b = '+';
    134                 }
    135                 buffer.write(b);
    136             } else {
    137                 buffer.write('%');
    138                 char hex1 = Character.toUpperCase(
    139                   Character.forDigit((b >> 4) & 0xF, 16));
    140                 char hex2 = Character.toUpperCase(
    141                   Character.forDigit(b & 0xF, 16));
    142                 buffer.write(hex1);
    143                 buffer.write(hex2);
    144             }
    145         }
    146         return buffer.toByteArray();
    147     }
    148 
    149 
    150     /**
    151      * Decodes an array of URL safe 7-bit characters into an array of
    152      * original bytes. Escaped characters are converted back to their
    153      * original representation.
    154      *
    155      * @param bytes array of URL safe characters
    156      * @return array of original bytes
    157      * @throws DecoderException Thrown if URL decoding is unsuccessful
    158      */
    159     public static final byte[] decodeUrl(byte[] bytes)
    160          throws DecoderException
    161     {
    162         if (bytes == null) {
    163             return null;
    164         }
    165         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    166         for (int i = 0; i < bytes.length; i++) {
    167             int b = bytes[i];
    168             if (b == '+') {
    169                 buffer.write(' ');
    170             } else if (b == '%') {
    171                 try {
    172                     int u = Character.digit((char)bytes[++i], 16);
    173                     int l = Character.digit((char)bytes[++i], 16);
    174                     if (u == -1 || l == -1) {
    175                         throw new DecoderException("Invalid URL encoding");
    176                     }
    177                     buffer.write((char)((u << 4) + l));
    178                 } catch(ArrayIndexOutOfBoundsException e) {
    179                     throw new DecoderException("Invalid URL encoding");
    180                 }
    181             } else {
    182                 buffer.write(b);
    183             }
    184         }
    185         return buffer.toByteArray();
    186     }
    187 
    188 
    189     /**
    190      * Encodes an array of bytes into an array of URL safe 7-bit
    191      * characters. Unsafe characters are escaped.
    192      *
    193      * @param bytes array of bytes to convert to URL safe characters
    194      * @return array of bytes containing URL safe characters
    195      */
    196     public byte[] encode(byte[] bytes) {
    197         return encodeUrl(WWW_FORM_URL, bytes);
    198     }
    199 
    200 
    201     /**
    202      * Decodes an array of URL safe 7-bit characters into an array of
    203      * original bytes. Escaped characters are converted back to their
    204      * original representation.
    205      *
    206      * @param bytes array of URL safe characters
    207      * @return array of original bytes
    208      * @throws DecoderException Thrown if URL decoding is unsuccessful
    209      */
    210     public byte[] decode(byte[] bytes) throws DecoderException {
    211         return decodeUrl(bytes);
    212     }
    213 
    214 
    215     /**
    216      * Encodes a string into its URL safe form using the specified
    217      * string charset. Unsafe characters are escaped.
    218      *
    219      * @param pString string to convert to a URL safe form
    220      * @param charset the charset for pString
    221      * @return URL safe string
    222      * @throws UnsupportedEncodingException Thrown if charset is not
    223      *                                      supported
    224      */
    225     public String encode(String pString, String charset)
    226         throws UnsupportedEncodingException
    227     {
    228         if (pString == null) {
    229             return null;
    230         }
    231         return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
    232     }
    233 
    234 
    235     /**
    236      * Encodes a string into its URL safe form using the default string
    237      * charset. Unsafe characters are escaped.
    238      *
    239      * @param pString string to convert to a URL safe form
    240      * @return URL safe string
    241      * @throws EncoderException Thrown if URL encoding is unsuccessful
    242      *
    243      * @see #getDefaultCharset()
    244      */
    245     public String encode(String pString) throws EncoderException {
    246         if (pString == null) {
    247             return null;
    248         }
    249         try {
    250             return encode(pString, getDefaultCharset());
    251         } catch(UnsupportedEncodingException e) {
    252             throw new EncoderException(e.getMessage());
    253         }
    254     }
    255 
    256 
    257     /**
    258      * Decodes a URL safe string into its original form using the
    259      * specified encoding. Escaped characters are converted back
    260      * to their original representation.
    261      *
    262      * @param pString URL safe string to convert into its original form
    263      * @param charset the original string charset
    264      * @return original string
    265      * @throws DecoderException Thrown if URL decoding is unsuccessful
    266      * @throws UnsupportedEncodingException Thrown if charset is not
    267      *                                      supported
    268      */
    269     public String decode(String pString, String charset)
    270         throws DecoderException, UnsupportedEncodingException
    271     {
    272         if (pString == null) {
    273             return null;
    274         }
    275         return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
    276     }
    277 
    278 
    279     /**
    280      * Decodes a URL safe string into its original form using the default
    281      * string charset. Escaped characters are converted back to their
    282      * original representation.
    283      *
    284      * @param pString URL safe string to convert into its original form
    285      * @return original string
    286      * @throws DecoderException Thrown if URL decoding is unsuccessful
    287      *
    288      * @see #getDefaultCharset()
    289      */
    290     public String decode(String pString) throws DecoderException {
    291         if (pString == null) {
    292             return null;
    293         }
    294         try {
    295             return decode(pString, getDefaultCharset());
    296         } catch(UnsupportedEncodingException e) {
    297             throw new DecoderException(e.getMessage());
    298         }
    299     }
    300 
    301     /**
    302      * Encodes an object into its URL safe form. Unsafe characters are
    303      * escaped.
    304      *
    305      * @param pObject string to convert to a URL safe form
    306      * @return URL safe object
    307      * @throws EncoderException Thrown if URL encoding is not
    308      *                          applicable to objects of this type or
    309      *                          if encoding is unsuccessful
    310      */
    311     public Object encode(Object pObject) throws EncoderException {
    312         if (pObject == null) {
    313             return null;
    314         } else if (pObject instanceof byte[]) {
    315             return encode((byte[])pObject);
    316         } else if (pObject instanceof String) {
    317             return encode((String)pObject);
    318         } else {
    319             throw new EncoderException("Objects of type " +
    320                 pObject.getClass().getName() + " cannot be URL encoded");
    321 
    322         }
    323     }
    324 
    325     /**
    326      * Decodes a URL safe object into its original form. Escaped
    327      * characters are converted back to their original representation.
    328      *
    329      * @param pObject URL safe object to convert into its original form
    330      * @return original object
    331      * @throws DecoderException Thrown if URL decoding is not
    332      *                          applicable to objects of this type
    333      *                          if decoding is unsuccessful
    334      */
    335     public Object decode(Object pObject) throws DecoderException {
    336         if (pObject == null) {
    337             return null;
    338         } else if (pObject instanceof byte[]) {
    339             return decode((byte[])pObject);
    340         } else if (pObject instanceof String) {
    341             return decode((String)pObject);
    342         } else {
    343             throw new DecoderException("Objects of type " +
    344                 pObject.getClass().getName() + " cannot be URL decoded");
    345 
    346         }
    347     }
    348 
    349     /**
    350      * The <code>String</code> encoding used for decoding and encoding.
    351      *
    352      * @return Returns the encoding.
    353      *
    354      * @deprecated use #getDefaultCharset()
    355      */
    356     public String getEncoding() {
    357         return this.charset;
    358     }
    359 
    360     /**
    361      * The default charset used for string decoding and encoding.
    362      *
    363      * @return the default string charset.
    364      */
    365     public String getDefaultCharset() {
    366         return this.charset;
    367     }
    368 
    369 }
    370