Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright 2001-2004 The Apache Software Foundation.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.apache.commons.codec.net;
     18 
     19 import java.io.ByteArrayOutputStream;
     20 import java.io.UnsupportedEncodingException;
     21 import java.util.BitSet;
     22 
     23 import org.apache.commons.codec.BinaryDecoder;
     24 import org.apache.commons.codec.BinaryEncoder;
     25 import org.apache.commons.codec.DecoderException;
     26 import org.apache.commons.codec.EncoderException;
     27 import org.apache.commons.codec.StringDecoder;
     28 import org.apache.commons.codec.StringEncoder;
     29 
     30 /**
     31  * <p>Implements the 'www-form-urlencoded' encoding scheme,
     32  * also misleadingly known as URL encoding.</p>
     33  *
     34  * <p>For more detailed information please refer to
     35  * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
     36  * Chapter 17.13.4 'Form content types'</a> of the
     37  * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
     38  *
     39  * <p>
     40  * This codec is meant to be a replacement for standard Java classes
     41  * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
     42  * on older Java platforms, as these classes in Java versions below
     43  * 1.4 rely on the platform's default charset encoding.
     44  * </p>
     45  *
     46  * @author Apache Software Foundation
     47  * @since 1.2
     48  * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
     49  */
     50 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
     51 
     52     /**
     53      * The default charset used for string decoding and encoding.
     54      */
     55     protected String charset = StringEncodings.UTF8;
     56 
     57     protected static byte ESCAPE_CHAR = '%';
     58     /**
     59      * BitSet of www-form-url safe characters.
     60      */
     61     protected static final BitSet WWW_FORM_URL = new BitSet(256);
     62 
     63     // Static initializer for www_form_url
     64     static {
     65         // alpha characters
     66         for (int i = 'a'; i <= 'z'; i++) {
     67             WWW_FORM_URL.set(i);
     68         }
     69         for (int i = 'A'; i <= 'Z'; i++) {
     70             WWW_FORM_URL.set(i);
     71         }
     72         // numeric characters
     73         for (int i = '0'; i <= '9'; i++) {
     74             WWW_FORM_URL.set(i);
     75         }
     76         // special chars
     77         WWW_FORM_URL.set('-');
     78         WWW_FORM_URL.set('_');
     79         WWW_FORM_URL.set('.');
     80         WWW_FORM_URL.set('*');
     81         // blank to be replaced with +
     82         WWW_FORM_URL.set(' ');
     83     }
     84 
     85 
     86     /**
     87      * Default constructor.
     88      */
     89     public URLCodec() {
     90         super();
     91     }
     92 
     93     /**
     94      * Constructor which allows for the selection of a default charset
     95      *
     96      * @param charset the default string charset to use.
     97      */
     98     public URLCodec(String charset) {
     99         super();
    100         this.charset = charset;
    101     }
    102 
    103     /**
    104      * Encodes an array of bytes into an array of URL safe 7-bit
    105      * characters. Unsafe characters are escaped.
    106      *
    107      * @param urlsafe bitset of characters deemed URL safe
    108      * @param bytes array of bytes to convert to URL safe characters
    109      * @return array of bytes containing URL safe characters
    110      */
    111     public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes)
    112     {
    113         if (bytes == null) {
    114             return null;
    115         }
    116         if (urlsafe == null) {
    117             urlsafe = WWW_FORM_URL;
    118         }
    119 
    120         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    121         for (int i = 0; i < bytes.length; i++) {
    122             int b = bytes[i];
    123             if (b < 0) {
    124                 b = 256 + b;
    125             }
    126             if (urlsafe.get(b)) {
    127                 if (b == ' ') {
    128                     b = '+';
    129                 }
    130                 buffer.write(b);
    131             } else {
    132                 buffer.write('%');
    133                 char hex1 = Character.toUpperCase(
    134                   Character.forDigit((b >> 4) & 0xF, 16));
    135                 char hex2 = Character.toUpperCase(
    136                   Character.forDigit(b & 0xF, 16));
    137                 buffer.write(hex1);
    138                 buffer.write(hex2);
    139             }
    140         }
    141         return buffer.toByteArray();
    142     }
    143 
    144 
    145     /**
    146      * Decodes an array of URL safe 7-bit characters into an array of
    147      * original bytes. Escaped characters are converted back to their
    148      * original representation.
    149      *
    150      * @param bytes array of URL safe characters
    151      * @return array of original bytes
    152      * @throws DecoderException Thrown if URL decoding is unsuccessful
    153      */
    154     public static final byte[] decodeUrl(byte[] bytes)
    155          throws DecoderException
    156     {
    157         if (bytes == null) {
    158             return null;
    159         }
    160         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    161         for (int i = 0; i < bytes.length; i++) {
    162             int b = bytes[i];
    163             if (b == '+') {
    164                 buffer.write(' ');
    165             } else if (b == '%') {
    166                 try {
    167                     int u = Character.digit((char)bytes[++i], 16);
    168                     int l = Character.digit((char)bytes[++i], 16);
    169                     if (u == -1 || l == -1) {
    170                         throw new DecoderException("Invalid URL encoding");
    171                     }
    172                     buffer.write((char)((u << 4) + l));
    173                 } catch(ArrayIndexOutOfBoundsException e) {
    174                     throw new DecoderException("Invalid URL encoding");
    175                 }
    176             } else {
    177                 buffer.write(b);
    178             }
    179         }
    180         return buffer.toByteArray();
    181     }
    182 
    183 
    184     /**
    185      * Encodes an array of bytes into an array of URL safe 7-bit
    186      * characters. Unsafe characters are escaped.
    187      *
    188      * @param bytes array of bytes to convert to URL safe characters
    189      * @return array of bytes containing URL safe characters
    190      */
    191     public byte[] encode(byte[] bytes) {
    192         return encodeUrl(WWW_FORM_URL, bytes);
    193     }
    194 
    195 
    196     /**
    197      * Decodes an array of URL safe 7-bit characters into an array of
    198      * original bytes. Escaped characters are converted back to their
    199      * original representation.
    200      *
    201      * @param bytes array of URL safe characters
    202      * @return array of original bytes
    203      * @throws DecoderException Thrown if URL decoding is unsuccessful
    204      */
    205     public byte[] decode(byte[] bytes) throws DecoderException {
    206         return decodeUrl(bytes);
    207     }
    208 
    209 
    210     /**
    211      * Encodes a string into its URL safe form using the specified
    212      * string charset. Unsafe characters are escaped.
    213      *
    214      * @param pString string to convert to a URL safe form
    215      * @param charset the charset for pString
    216      * @return URL safe string
    217      * @throws UnsupportedEncodingException Thrown if charset is not
    218      *                                      supported
    219      */
    220     public String encode(String pString, String charset)
    221         throws UnsupportedEncodingException
    222     {
    223         if (pString == null) {
    224             return null;
    225         }
    226         return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
    227     }
    228 
    229 
    230     /**
    231      * Encodes a string into its URL safe form using the default string
    232      * charset. Unsafe characters are escaped.
    233      *
    234      * @param pString string to convert to a URL safe form
    235      * @return URL safe string
    236      * @throws EncoderException Thrown if URL encoding is unsuccessful
    237      *
    238      * @see #getDefaultCharset()
    239      */
    240     public String encode(String pString) throws EncoderException {
    241         if (pString == null) {
    242             return null;
    243         }
    244         try {
    245             return encode(pString, getDefaultCharset());
    246         } catch(UnsupportedEncodingException e) {
    247             throw new EncoderException(e.getMessage());
    248         }
    249     }
    250 
    251 
    252     /**
    253      * Decodes a URL safe string into its original form using the
    254      * specified encoding. Escaped characters are converted back
    255      * to their original representation.
    256      *
    257      * @param pString URL safe string to convert into its original form
    258      * @param charset the original string charset
    259      * @return original string
    260      * @throws DecoderException Thrown if URL decoding is unsuccessful
    261      * @throws UnsupportedEncodingException Thrown if charset is not
    262      *                                      supported
    263      */
    264     public String decode(String pString, String charset)
    265         throws DecoderException, UnsupportedEncodingException
    266     {
    267         if (pString == null) {
    268             return null;
    269         }
    270         return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
    271     }
    272 
    273 
    274     /**
    275      * Decodes a URL safe string into its original form using the default
    276      * string charset. Escaped characters are converted back to their
    277      * original representation.
    278      *
    279      * @param pString URL safe string to convert into its original form
    280      * @return original string
    281      * @throws DecoderException Thrown if URL decoding is unsuccessful
    282      *
    283      * @see #getDefaultCharset()
    284      */
    285     public String decode(String pString) throws DecoderException {
    286         if (pString == null) {
    287             return null;
    288         }
    289         try {
    290             return decode(pString, getDefaultCharset());
    291         } catch(UnsupportedEncodingException e) {
    292             throw new DecoderException(e.getMessage());
    293         }
    294     }
    295 
    296     /**
    297      * Encodes an object into its URL safe form. Unsafe characters are
    298      * escaped.
    299      *
    300      * @param pObject string to convert to a URL safe form
    301      * @return URL safe object
    302      * @throws EncoderException Thrown if URL encoding is not
    303      *                          applicable to objects of this type or
    304      *                          if encoding is unsuccessful
    305      */
    306     public Object encode(Object pObject) throws EncoderException {
    307         if (pObject == null) {
    308             return null;
    309         } else if (pObject instanceof byte[]) {
    310             return encode((byte[])pObject);
    311         } else if (pObject instanceof String) {
    312             return encode((String)pObject);
    313         } else {
    314             throw new EncoderException("Objects of type " +
    315                 pObject.getClass().getName() + " cannot be URL encoded");
    316 
    317         }
    318     }
    319 
    320     /**
    321      * Decodes a URL safe object into its original form. Escaped
    322      * characters are converted back to their original representation.
    323      *
    324      * @param pObject URL safe object to convert into its original form
    325      * @return original object
    326      * @throws DecoderException Thrown if URL decoding is not
    327      *                          applicable to objects of this type
    328      *                          if decoding is unsuccessful
    329      */
    330     public Object decode(Object pObject) throws DecoderException {
    331         if (pObject == null) {
    332             return null;
    333         } else if (pObject instanceof byte[]) {
    334             return decode((byte[])pObject);
    335         } else if (pObject instanceof String) {
    336             return decode((String)pObject);
    337         } else {
    338             throw new DecoderException("Objects of type " +
    339                 pObject.getClass().getName() + " cannot be URL decoded");
    340 
    341         }
    342     }
    343 
    344     /**
    345      * The <code>String</code> encoding used for decoding and encoding.
    346      *
    347      * @return Returns the encoding.
    348      *
    349      * @deprecated use #getDefaultCharset()
    350      */
    351     public String getEncoding() {
    352         return this.charset;
    353     }
    354 
    355     /**
    356      * The default charset used for string decoding and encoding.
    357      *
    358      * @return the default string charset.
    359      */
    360     public String getDefaultCharset() {
    361         return this.charset;
    362     }
    363 
    364 }
    365