Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package libcore.net;
     18 
     19 import java.io.ByteArrayOutputStream;
     20 import java.net.URISyntaxException;
     21 import java.nio.ByteBuffer;
     22 import java.nio.CharBuffer;
     23 import java.nio.charset.CharacterCodingException;
     24 import java.nio.charset.Charset;
     25 import java.nio.charset.CharsetDecoder;
     26 import java.nio.charset.CharsetEncoder;
     27 import java.nio.charset.CoderResult;
     28 import java.nio.charset.CodingErrorAction;
     29 import java.nio.charset.StandardCharsets;
     30 
     31 /**
     32  * Encodes and decodes application/x-www-form-urlencoded content.
     33  *
     34  * Subclasses define isRetained, which decides which chars need to be escaped and which dont.
     35  * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to
     36  * its equivalent UTF-8 encoded byte sequence, which is then converted to its escaped form.
     37  * e.g a 4 byte sequence might look like %c6%ef%e0%e8
     38  */
     39 public abstract class UriCodec {
     40     /**
     41      * Returns true iff. c does not need to be escaped.
     42      * 'a - z , A - Z and 0 - 9 are always considered valid (i.e, dont need to be
     43      * escaped. This set is referred to as the ``whitelist''.
     44      */
     45     protected abstract boolean isRetained(char c);
     46 
     47     private static boolean isWhitelisted(char c) {
     48         return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9');
     49     }
     50 
     51     private boolean isWhitelistedOrRetained(char c) {
     52         return isWhitelisted(c) || isRetained(c);
     53     }
     54 
     55     /**
     56      * Throw URISyntaxException if any of the characters in the range [start, end) are not valid
     57      * according to this codec.
     58      *  - If a char is in the whitelist or retained, it is valid both escaped and unescaped.
     59      *  - All escaped octets appearing in the input are structurally valid hex, i.e convertible to
     60      *  decimals.
     61      *
     62      * On success, the substring [start, end) is returned.
     63      * {@code name} is not used, except to generate debugging info.
     64      */
     65     public final String validate(String uri, int start, int end, String name)
     66             throws URISyntaxException {
     67         int i = start;
     68         while (i < end) {
     69             char c = uri.charAt(i++);
     70             if (isWhitelistedOrRetained(c)) {
     71                 continue;
     72             }
     73             // c is either '%' or character not allowed in a uri.
     74             if (c != '%') {
     75                 throw unexpectedCharacterException(uri, name, c, i - 1);
     76             }
     77             // Expect two characters representing a number in hex.
     78             for (int j = 0; j < 2; j++) {
     79                 c = getNextCharacter(uri, i++, end, name);
     80                 if (hexCharToValue(c) < 0) {
     81                     throw unexpectedCharacterException(uri, name, c, i - 1);
     82                 }
     83             }
     84         }
     85         return uri.substring(start, end);
     86     }
     87 
     88     /**
     89      * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f').
     90      */
     91     private static int hexCharToValue(char c) {
     92         if('0' <= c && c <= '9') {
     93             return c - '0';
     94         }
     95         if ('a' <= c && c <= 'f') {
     96             return 10 + c - 'a';
     97         }
     98         if ('A' <= c && c <= 'F') {
     99             return 10 + c - 'A';
    100         }
    101         return -1;
    102     }
    103 
    104     private static URISyntaxException unexpectedCharacterException(
    105             String uri, String name, char unexpected, int index) {
    106         String nameString = (name == null) ? "" :  " in [" + name + "]";
    107         return new URISyntaxException(
    108                 uri, "Unexpected character" + nameString + ": " + unexpected, index);
    109     }
    110 
    111     private static char getNextCharacter(String uri, int index, int end, String name)
    112              throws URISyntaxException {
    113         if (index >= end) {
    114             String nameString = (name == null) ? "" :  " in [" + name + "]";
    115             throw new URISyntaxException(
    116                     uri, "Unexpected end of string" + nameString, index);
    117         }
    118         return uri.charAt(index);
    119     }
    120 
    121     /**
    122      * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor
    123      * in {@code legal}.
    124      */
    125     public static void validateSimple(String uri, String legal) throws URISyntaxException {
    126         for (int i = 0; i < uri.length(); i++) {
    127             char c = uri.charAt(i);
    128             if (!isWhitelisted(c) && legal.indexOf(c) < 0) {
    129                 throw unexpectedCharacterException(uri, null /* name */, c, i);
    130             }
    131         }
    132     }
    133 
    134     /**
    135      * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
    136      *
    137      * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes.
    138      */
    139     public final String encode(String s, Charset charset) {
    140         StringBuilder builder = new StringBuilder(s.length());
    141         appendEncoded(builder, s, charset, false);
    142         return builder.toString();
    143     }
    144 
    145     /**
    146      * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
    147      *
    148      * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
    149      */
    150     public final void appendEncoded(StringBuilder builder, String s) {
    151         appendEncoded(builder, s, StandardCharsets.UTF_8, false);
    152     }
    153 
    154     /**
    155      * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
    156      *
    157      * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
    158      * This method must produce partially encoded output. What this means is that if encoded octets
    159      * appear in the input string, they are passed through unmodified, instead of being double
    160      * escaped. Consider a decoder operating on the global whitelist dealing with a string
    161      * foo%25bar. With this method, the output will be foo%25bar, but with appendEncoded, it
    162      * will be double encoded into foo%2525bar.
    163      */
    164     public final void appendPartiallyEncoded(StringBuilder builder, String s) {
    165         appendEncoded(builder, s, StandardCharsets.UTF_8, true);
    166     }
    167 
    168     private void appendEncoded(
    169             StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) {
    170         CharsetEncoder encoder = charset.newEncoder()
    171                 .onMalformedInput(CodingErrorAction.REPORT)
    172                 .onUnmappableCharacter(CodingErrorAction.REPORT);
    173         CharBuffer cBuffer = CharBuffer.allocate(s.length());
    174         for (int i = 0; i < s.length(); i++) {
    175             char c = s.charAt(i);
    176             if (c == '%' && partiallyEncoded) {
    177                 // In case there are characters waiting to be encoded.
    178                 flushEncodingCharBuffer(builder, encoder, cBuffer);
    179                 builder.append('%');
    180                 continue;
    181             }
    182 
    183             if (c == ' ' && isRetained(' ')) {
    184                 flushEncodingCharBuffer(builder, encoder, cBuffer);
    185                 builder.append('+');
    186                 continue;
    187             }
    188 
    189             if (isWhitelistedOrRetained(c)) {
    190                 flushEncodingCharBuffer(builder, encoder, cBuffer);
    191                 builder.append(c);
    192                 continue;
    193             }
    194 
    195             // Put the character in the queue for encoding.
    196             cBuffer.put(c);
    197         }
    198         flushEncodingCharBuffer(builder, encoder, cBuffer);
    199     }
    200 
    201     private static void flushEncodingCharBuffer(
    202             StringBuilder builder,
    203             CharsetEncoder encoder,
    204             CharBuffer cBuffer) {
    205         if (cBuffer.position() == 0) {
    206             return;
    207         }
    208         // We are reading from the buffer now.
    209         cBuffer.flip();
    210         ByteBuffer byteBuffer = ByteBuffer.allocate(
    211                 cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar()));
    212         byteBuffer.position(0);
    213         CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */);
    214         // According to the {@code CharsetEncoder#encode} spec, the method returns underflow
    215         // and leaves an empty output when all bytes were processed correctly.
    216         if (result != CoderResult.UNDERFLOW) {
    217             throw new IllegalArgumentException(
    218                     "Error encoding, unexpected result ["
    219                             + result.toString()
    220                             + "] using encoder for ["
    221                             + encoder.charset().name()
    222                             + "]");
    223         }
    224         if (cBuffer.hasRemaining()) {
    225             throw new IllegalArgumentException(
    226                     "Encoder for [" + encoder.charset().name() + "] failed with underflow with "
    227                             + "remaining input [" + cBuffer + "]");
    228         }
    229         // Need to flush in case the encoder saves internal state.
    230         encoder.flush(byteBuffer);
    231         if (result != CoderResult.UNDERFLOW) {
    232             throw new IllegalArgumentException(
    233                     "Error encoding, unexpected result ["
    234                             + result.toString()
    235                             + "] flushing encoder for ["
    236                             + encoder.charset().name()
    237                             + "]");
    238         }
    239         encoder.reset();
    240 
    241         byteBuffer.flip();
    242         // Write the encoded bytes.
    243         while(byteBuffer.hasRemaining()) {
    244             byte b = byteBuffer.get();
    245             builder.append('%');
    246             builder.append(intToHexDigit((b & 0xf0) >>> 4));
    247             builder.append(intToHexDigit(b & 0x0f));
    248 
    249         }
    250         // Use the character buffer to write again.
    251         cBuffer.flip();
    252         cBuffer.limit(cBuffer.capacity());
    253     }
    254 
    255     private static char intToHexDigit(int b) {
    256         if (b < 10) {
    257             return (char) ('0' + b);
    258         } else {
    259             return (char) ('A' + b - 10);
    260         }
    261     }
    262 
    263     /**
    264      * Decode a string according to the rules of this decoder.
    265      *
    266      * - if {@code convertPlus == true} all + chars in the decoded output are converted to  
    267      *   (white space)
    268      * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for
    269      *   invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets.
    270      */
    271     public static String decode(
    272             String s, boolean convertPlus, Charset charset, boolean throwOnFailure) {
    273         StringBuilder builder = new StringBuilder(s.length());
    274         appendDecoded(builder, s, convertPlus, charset, throwOnFailure);
    275         return builder.toString();
    276     }
    277 
    278     /**
    279      * Character to be output when there's an error decoding an input.
    280      */
    281     private static final char INVALID_INPUT_CHARACTER = '\ufffd';
    282 
    283     private static void appendDecoded(
    284             StringBuilder builder,
    285             String s,
    286             boolean convertPlus,
    287             Charset charset,
    288             boolean throwOnFailure) {
    289         CharsetDecoder decoder = charset.newDecoder()
    290                 .onMalformedInput(CodingErrorAction.REPLACE)
    291                 .replaceWith("\ufffd")
    292                 .onUnmappableCharacter(CodingErrorAction.REPORT);
    293         // Holds the bytes corresponding to the escaped chars being read (empty if the last char
    294         // wasn't a escaped char).
    295         ByteBuffer byteBuffer = ByteBuffer.allocate(s.length());
    296         int i = 0;
    297         while (i < s.length()) {
    298             char c = s.charAt(i);
    299             i++;
    300             switch (c) {
    301                 case '+':
    302                     flushDecodingByteAccumulator(
    303                             builder, decoder, byteBuffer, throwOnFailure);
    304                     builder.append(convertPlus ? ' ' : '+');
    305                     break;
    306                 case '%':
    307                     // Expect two characters representing a number in hex.
    308                     byte hexValue = 0;
    309                     for (int j = 0; j < 2; j++) {
    310                         try {
    311                             c = getNextCharacter(s, i, s.length(), null /* name */);
    312                         } catch (URISyntaxException e) {
    313                             // Unexpected end of input.
    314                             if (throwOnFailure) {
    315                                 throw new IllegalArgumentException(e);
    316                             } else {
    317                                 flushDecodingByteAccumulator(
    318                                         builder, decoder, byteBuffer, throwOnFailure);
    319                                 builder.append(INVALID_INPUT_CHARACTER);
    320                                 return;
    321                             }
    322                         }
    323                         i++;
    324                         int newDigit = hexCharToValue(c);
    325                         if (newDigit < 0) {
    326                             if (throwOnFailure) {
    327                                 throw new IllegalArgumentException(
    328                                         unexpectedCharacterException(s, null /* name */, c, i - 1));
    329                             } else {
    330                                 flushDecodingByteAccumulator(
    331                                         builder, decoder, byteBuffer, throwOnFailure);
    332                                 builder.append(INVALID_INPUT_CHARACTER);
    333                                 break;
    334                             }
    335                         }
    336                         hexValue = (byte) (hexValue * 0x10 + newDigit);
    337                     }
    338                     byteBuffer.put(hexValue);
    339                     break;
    340                 default:
    341                     flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
    342                     builder.append(c);
    343             }
    344         }
    345         flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
    346     }
    347 
    348     private static void flushDecodingByteAccumulator(
    349             StringBuilder builder,
    350             CharsetDecoder decoder,
    351             ByteBuffer byteBuffer,
    352             boolean throwOnFailure) {
    353         if (byteBuffer.position() == 0) {
    354             return;
    355         }
    356         byteBuffer.flip();
    357         try {
    358             builder.append(decoder.decode(byteBuffer));
    359         } catch (CharacterCodingException e) {
    360             if (throwOnFailure) {
    361                 throw new IllegalArgumentException(e);
    362             } else {
    363                 builder.append(INVALID_INPUT_CHARACTER);
    364             }
    365         } finally {
    366             // Use the byte buffer to write again.
    367             byteBuffer.flip();
    368             byteBuffer.limit(byteBuffer.capacity());
    369         }
    370     }
    371 
    372     /**
    373      * Equivalent to {@code decode(s, false, UTF_8, true)}
    374      */
    375     public static String decode(String s) {
    376         return decode(
    377                 s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */);
    378     }
    379 }