Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
      4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      5  *
      6  * This code is free software; you can redistribute it and/or modify it
      7  * under the terms of the GNU General Public License version 2 only, as
      8  * published by the Free Software Foundation.  Oracle designates this
      9  * particular file as subject to the "Classpath" exception as provided
     10  * by Oracle in the LICENSE file that accompanied this code.
     11  *
     12  * This code is distributed in the hope that it will be useful, but WITHOUT
     13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  * version 2 for more details (a copy is included in the LICENSE file that
     16  * accompanied this code).
     17  *
     18  * You should have received a copy of the GNU General Public License version
     19  * 2 along with this work; if not, write to the Free Software Foundation,
     20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21  *
     22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     23  * or visit www.oracle.com if you need additional information or have any
     24  * questions.
     25  */
     26 
     27 package java.net;
     28 
     29 import java.io.*;
     30 
     31 /**
     32  * Utility class for HTML form decoding. This class contains static methods
     33  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
     34  * MIME format.
     35  * <p>
     36  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
     37  * that all characters in the encoded string are one of the following:
     38  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
     39  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
     40  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
     41  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
     42  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
     43  * character &quot;{@code %}&quot; is allowed but is interpreted
     44  * as the start of a special escaped sequence.
     45  * <p>
     46  * The following rules are applied in the conversion:
     47  *
     48  * <ul>
     49  * <li>The alphanumeric characters &quot;{@code a}&quot; through
     50  *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
     51  *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
     52  *     through &quot;{@code 9}&quot; remain the same.
     53  * <li>The special characters &quot;{@code .}&quot;,
     54  *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
     55  *     &quot;{@code _}&quot; remain the same.
     56  * <li>The plus sign &quot;{@code +}&quot; is converted into a
     57  *     space character &quot; &nbsp; &quot; .
     58  * <li>A sequence of the form "<i>{@code %xy}</i>" will be
     59  *     treated as representing a byte where <i>xy</i> is the two-digit
     60  *     hexadecimal representation of the 8 bits. Then, all substrings
     61  *     that contain one or more of these byte sequences consecutively
     62  *     will be replaced by the character(s) whose encoding would result
     63  *     in those consecutive bytes.
     64  *     The encoding scheme used to decode these characters may be specified,
     65  *     or if unspecified, the default encoding of the platform will be used.
     66  * </ul>
     67  * <p>
     68  * There are two possible ways in which this decoder could deal with
     69  * illegal strings.  It could either leave illegal characters alone or
     70  * it could throw an {@link java.lang.IllegalArgumentException}.
     71  * Which approach the decoder takes is left to the
     72  * implementation.
     73  *
     74  * @author  Mark Chamness
     75  * @author  Michael McCloskey
     76  * @since   1.2
     77  */
     78 
     79 public class URLDecoder {
     80 
     81     // The platform default encoding
     82     static String dfltEncName = URLEncoder.dfltEncName;
     83 
     84     /**
     85      * Decodes a {@code x-www-form-urlencoded} string.
     86      * The platform's default encoding is used to determine what characters
     87      * are represented by any consecutive sequences of the form
     88      * "<i>{@code %xy}</i>".
     89      * @param s the {@code String} to decode
     90      * @deprecated The resulting string may vary depending on the platform's
     91      *          default encoding. Instead, use the decode(String,String) method
     92      *          to specify the encoding.
     93      * @return the newly decoded {@code String}
     94      */
     95     @Deprecated
     96     public static String decode(String s) {
     97 
     98         String str = null;
     99 
    100         try {
    101             str = decode(s, dfltEncName);
    102         } catch (UnsupportedEncodingException e) {
    103             // The system should always have the platform default
    104         }
    105 
    106         return str;
    107     }
    108 
    109     /**
    110      * Decodes a {@code application/x-www-form-urlencoded} string using a specific
    111      * encoding scheme.
    112      * The supplied encoding is used to determine
    113      * what characters are represented by any consecutive sequences of the
    114      * form "<i>{@code %xy}</i>".
    115      * <p>
    116      * <em><strong>Note:</strong> The <a href=
    117      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
    118      * World Wide Web Consortium Recommendation</a> states that
    119      * UTF-8 should be used. Not doing so may introduce
    120      * incompatibilities.</em>
    121      *
    122      * @param s the {@code String} to decode
    123      * @param enc   The name of a supported
    124      *    <a href="../lang/package-summary.html#charenc">character
    125      *    encoding</a>.
    126      * @return the newly decoded {@code String}
    127      * @exception  UnsupportedEncodingException
    128      *             If character encoding needs to be consulted, but
    129      *             named character encoding is not supported
    130      * @see URLEncoder#encode(java.lang.String, java.lang.String)
    131      * @since 1.4
    132      */
    133     public static String decode(String s, String enc)
    134         throws UnsupportedEncodingException{
    135 
    136         boolean needToChange = false;
    137         int numChars = s.length();
    138         StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
    139         int i = 0;
    140 
    141         if (enc.length() == 0) {
    142             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
    143         }
    144 
    145         char c;
    146         byte[] bytes = null;
    147         while (i < numChars) {
    148             c = s.charAt(i);
    149             switch (c) {
    150             case '+':
    151                 sb.append(' ');
    152                 i++;
    153                 needToChange = true;
    154                 break;
    155             case '%':
    156                 /*
    157                  * Starting with this instance of %, process all
    158                  * consecutive substrings of the form %xy. Each
    159                  * substring %xy will yield a byte. Convert all
    160                  * consecutive  bytes obtained this way to whatever
    161                  * character(s) they represent in the provided
    162                  * encoding.
    163                  */
    164 
    165                 try {
    166 
    167                     // (numChars-i)/3 is an upper bound for the number
    168                     // of remaining bytes
    169                     if (bytes == null)
    170                         bytes = new byte[(numChars-i)/3];
    171                     int pos = 0;
    172 
    173                     while ( ((i+2) < numChars) &&
    174                             (c=='%')) {
    175                         // BEGIN Android-changed: App compat. Forbid non-hex chars after '%'.
    176                         if (!isValidHexChar(s.charAt(i+1)) || !isValidHexChar(s.charAt(i+2))) {
    177                             throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern : "
    178                                     + s.substring(i, i + 3));
    179                         }
    180                         // END Android-changed: App compat. Forbid non-hex chars after '%'.
    181                         int v = Integer.parseInt(s.substring(i+1,i+3),16);
    182                         if (v < 0)
    183                             // Android-changed: Improve error message by printing the string value.
    184                             throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value : "
    185                                     + s.substring(i, i + 3));
    186                         bytes[pos++] = (byte) v;
    187                         i+= 3;
    188                         if (i < numChars)
    189                             c = s.charAt(i);
    190                     }
    191 
    192                     // A trailing, incomplete byte encoding such as
    193                     // "%x" will cause an exception to be thrown
    194 
    195                     if ((i < numChars) && (c=='%'))
    196                         throw new IllegalArgumentException(
    197                          "URLDecoder: Incomplete trailing escape (%) pattern");
    198 
    199                     sb.append(new String(bytes, 0, pos, enc));
    200                 } catch (NumberFormatException e) {
    201                     throw new IllegalArgumentException(
    202                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
    203                     + e.getMessage());
    204                 }
    205                 needToChange = true;
    206                 break;
    207             default:
    208                 sb.append(c);
    209                 i++;
    210                 break;
    211             }
    212         }
    213 
    214         return (needToChange? sb.toString() : s);
    215     }
    216 
    217     // BEGIN Android-added: App compat. Forbid non-hex chars after '%'.
    218     private static boolean isValidHexChar(char c) {
    219         return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
    220     }
    221     // END Android-added: App compat. Forbid non-hex chars after '%'.
    222 }
    223