1 /* 2 * Copyright 2001-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.commons.codec.net; 18 19 import java.io.ByteArrayOutputStream; 20 import java.io.UnsupportedEncodingException; 21 import java.util.BitSet; 22 23 import org.apache.commons.codec.BinaryDecoder; 24 import org.apache.commons.codec.BinaryEncoder; 25 import org.apache.commons.codec.DecoderException; 26 import org.apache.commons.codec.EncoderException; 27 import org.apache.commons.codec.StringDecoder; 28 import org.apache.commons.codec.StringEncoder; 29 30 /** 31 * <p>Implements the 'www-form-urlencoded' encoding scheme, 32 * also misleadingly known as URL encoding.</p> 33 * 34 * <p>For more detailed information please refer to 35 * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1"> 36 * Chapter 17.13.4 'Form content types'</a> of the 37 * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p> 38 * 39 * <p> 40 * This codec is meant to be a replacement for standard Java classes 41 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder} 42 * on older Java platforms, as these classes in Java versions below 43 * 1.4 rely on the platform's default charset encoding. 44 * </p> 45 * 46 * @author Apache Software Foundation 47 * @since 1.2 48 * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $ 49 */ 50 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 51 52 /** 53 * The default charset used for string decoding and encoding. 54 */ 55 protected String charset = StringEncodings.UTF8; 56 57 protected static byte ESCAPE_CHAR = '%'; 58 /** 59 * BitSet of www-form-url safe characters. 60 */ 61 protected static final BitSet WWW_FORM_URL = new BitSet(256); 62 63 // Static initializer for www_form_url 64 static { 65 // alpha characters 66 for (int i = 'a'; i <= 'z'; i++) { 67 WWW_FORM_URL.set(i); 68 } 69 for (int i = 'A'; i <= 'Z'; i++) { 70 WWW_FORM_URL.set(i); 71 } 72 // numeric characters 73 for (int i = '0'; i <= '9'; i++) { 74 WWW_FORM_URL.set(i); 75 } 76 // special chars 77 WWW_FORM_URL.set('-'); 78 WWW_FORM_URL.set('_'); 79 WWW_FORM_URL.set('.'); 80 WWW_FORM_URL.set('*'); 81 // blank to be replaced with + 82 WWW_FORM_URL.set(' '); 83 } 84 85 86 /** 87 * Default constructor. 88 */ 89 public URLCodec() { 90 super(); 91 } 92 93 /** 94 * Constructor which allows for the selection of a default charset 95 * 96 * @param charset the default string charset to use. 97 */ 98 public URLCodec(String charset) { 99 super(); 100 this.charset = charset; 101 } 102 103 /** 104 * Encodes an array of bytes into an array of URL safe 7-bit 105 * characters. Unsafe characters are escaped. 106 * 107 * @param urlsafe bitset of characters deemed URL safe 108 * @param bytes array of bytes to convert to URL safe characters 109 * @return array of bytes containing URL safe characters 110 */ 111 public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) 112 { 113 if (bytes == null) { 114 return null; 115 } 116 if (urlsafe == null) { 117 urlsafe = WWW_FORM_URL; 118 } 119 120 ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 121 for (int i = 0; i < bytes.length; i++) { 122 int b = bytes[i]; 123 if (b < 0) { 124 b = 256 + b; 125 } 126 if (urlsafe.get(b)) { 127 if (b == ' ') { 128 b = '+'; 129 } 130 buffer.write(b); 131 } else { 132 buffer.write('%'); 133 char hex1 = Character.toUpperCase( 134 Character.forDigit((b >> 4) & 0xF, 16)); 135 char hex2 = Character.toUpperCase( 136 Character.forDigit(b & 0xF, 16)); 137 buffer.write(hex1); 138 buffer.write(hex2); 139 } 140 } 141 return buffer.toByteArray(); 142 } 143 144 145 /** 146 * Decodes an array of URL safe 7-bit characters into an array of 147 * original bytes. Escaped characters are converted back to their 148 * original representation. 149 * 150 * @param bytes array of URL safe characters 151 * @return array of original bytes 152 * @throws DecoderException Thrown if URL decoding is unsuccessful 153 */ 154 public static final byte[] decodeUrl(byte[] bytes) 155 throws DecoderException 156 { 157 if (bytes == null) { 158 return null; 159 } 160 ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 161 for (int i = 0; i < bytes.length; i++) { 162 int b = bytes[i]; 163 if (b == '+') { 164 buffer.write(' '); 165 } else if (b == '%') { 166 try { 167 int u = Character.digit((char)bytes[++i], 16); 168 int l = Character.digit((char)bytes[++i], 16); 169 if (u == -1 || l == -1) { 170 throw new DecoderException("Invalid URL encoding"); 171 } 172 buffer.write((char)((u << 4) + l)); 173 } catch(ArrayIndexOutOfBoundsException e) { 174 throw new DecoderException("Invalid URL encoding"); 175 } 176 } else { 177 buffer.write(b); 178 } 179 } 180 return buffer.toByteArray(); 181 } 182 183 184 /** 185 * Encodes an array of bytes into an array of URL safe 7-bit 186 * characters. Unsafe characters are escaped. 187 * 188 * @param bytes array of bytes to convert to URL safe characters 189 * @return array of bytes containing URL safe characters 190 */ 191 public byte[] encode(byte[] bytes) { 192 return encodeUrl(WWW_FORM_URL, bytes); 193 } 194 195 196 /** 197 * Decodes an array of URL safe 7-bit characters into an array of 198 * original bytes. Escaped characters are converted back to their 199 * original representation. 200 * 201 * @param bytes array of URL safe characters 202 * @return array of original bytes 203 * @throws DecoderException Thrown if URL decoding is unsuccessful 204 */ 205 public byte[] decode(byte[] bytes) throws DecoderException { 206 return decodeUrl(bytes); 207 } 208 209 210 /** 211 * Encodes a string into its URL safe form using the specified 212 * string charset. Unsafe characters are escaped. 213 * 214 * @param pString string to convert to a URL safe form 215 * @param charset the charset for pString 216 * @return URL safe string 217 * @throws UnsupportedEncodingException Thrown if charset is not 218 * supported 219 */ 220 public String encode(String pString, String charset) 221 throws UnsupportedEncodingException 222 { 223 if (pString == null) { 224 return null; 225 } 226 return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII); 227 } 228 229 230 /** 231 * Encodes a string into its URL safe form using the default string 232 * charset. Unsafe characters are escaped. 233 * 234 * @param pString string to convert to a URL safe form 235 * @return URL safe string 236 * @throws EncoderException Thrown if URL encoding is unsuccessful 237 * 238 * @see #getDefaultCharset() 239 */ 240 public String encode(String pString) throws EncoderException { 241 if (pString == null) { 242 return null; 243 } 244 try { 245 return encode(pString, getDefaultCharset()); 246 } catch(UnsupportedEncodingException e) { 247 throw new EncoderException(e.getMessage()); 248 } 249 } 250 251 252 /** 253 * Decodes a URL safe string into its original form using the 254 * specified encoding. Escaped characters are converted back 255 * to their original representation. 256 * 257 * @param pString URL safe string to convert into its original form 258 * @param charset the original string charset 259 * @return original string 260 * @throws DecoderException Thrown if URL decoding is unsuccessful 261 * @throws UnsupportedEncodingException Thrown if charset is not 262 * supported 263 */ 264 public String decode(String pString, String charset) 265 throws DecoderException, UnsupportedEncodingException 266 { 267 if (pString == null) { 268 return null; 269 } 270 return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset); 271 } 272 273 274 /** 275 * Decodes a URL safe string into its original form using the default 276 * string charset. Escaped characters are converted back to their 277 * original representation. 278 * 279 * @param pString URL safe string to convert into its original form 280 * @return original string 281 * @throws DecoderException Thrown if URL decoding is unsuccessful 282 * 283 * @see #getDefaultCharset() 284 */ 285 public String decode(String pString) throws DecoderException { 286 if (pString == null) { 287 return null; 288 } 289 try { 290 return decode(pString, getDefaultCharset()); 291 } catch(UnsupportedEncodingException e) { 292 throw new DecoderException(e.getMessage()); 293 } 294 } 295 296 /** 297 * Encodes an object into its URL safe form. Unsafe characters are 298 * escaped. 299 * 300 * @param pObject string to convert to a URL safe form 301 * @return URL safe object 302 * @throws EncoderException Thrown if URL encoding is not 303 * applicable to objects of this type or 304 * if encoding is unsuccessful 305 */ 306 public Object encode(Object pObject) throws EncoderException { 307 if (pObject == null) { 308 return null; 309 } else if (pObject instanceof byte[]) { 310 return encode((byte[])pObject); 311 } else if (pObject instanceof String) { 312 return encode((String)pObject); 313 } else { 314 throw new EncoderException("Objects of type " + 315 pObject.getClass().getName() + " cannot be URL encoded"); 316 317 } 318 } 319 320 /** 321 * Decodes a URL safe object into its original form. Escaped 322 * characters are converted back to their original representation. 323 * 324 * @param pObject URL safe object to convert into its original form 325 * @return original object 326 * @throws DecoderException Thrown if URL decoding is not 327 * applicable to objects of this type 328 * if decoding is unsuccessful 329 */ 330 public Object decode(Object pObject) throws DecoderException { 331 if (pObject == null) { 332 return null; 333 } else if (pObject instanceof byte[]) { 334 return decode((byte[])pObject); 335 } else if (pObject instanceof String) { 336 return decode((String)pObject); 337 } else { 338 throw new DecoderException("Objects of type " + 339 pObject.getClass().getName() + " cannot be URL decoded"); 340 341 } 342 } 343 344 /** 345 * The <code>String</code> encoding used for decoding and encoding. 346 * 347 * @return Returns the encoding. 348 * 349 * @deprecated use #getDefaultCharset() 350 */ 351 public String getEncoding() { 352 return this.charset; 353 } 354 355 /** 356 * The default charset used for string decoding and encoding. 357 * 358 * @return the default string charset. 359 */ 360 public String getDefaultCharset() { 361 return this.charset; 362 } 363 364 } 365