1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package libcore.net; 18 19 import java.io.ByteArrayOutputStream; 20 import java.net.URISyntaxException; 21 import java.nio.ByteBuffer; 22 import java.nio.CharBuffer; 23 import java.nio.charset.CharacterCodingException; 24 import java.nio.charset.Charset; 25 import java.nio.charset.CharsetDecoder; 26 import java.nio.charset.CharsetEncoder; 27 import java.nio.charset.CoderResult; 28 import java.nio.charset.CodingErrorAction; 29 import java.nio.charset.StandardCharsets; 30 31 /** 32 * Encodes and decodes application/x-www-form-urlencoded content. 33 * 34 * Subclasses define isRetained, which decides which chars need to be escaped and which dont. 35 * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to 36 * its equivalent UTF-8 encoded byte sequence, which is then converted to its escaped form. 37 * e.g a 4 byte sequence might look like %c6%ef%e0%e8 38 */ 39 public abstract class UriCodec { 40 /** 41 * Returns true iff. c does not need to be escaped. 42 * 'a - z , A - Z and 0 - 9 are always considered valid (i.e, dont need to be 43 * escaped. This set is referred to as the ``whitelist''. 44 */ 45 protected abstract boolean isRetained(char c); 46 47 private static boolean isWhitelisted(char c) { 48 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9'); 49 } 50 51 private boolean isWhitelistedOrRetained(char c) { 52 return isWhitelisted(c) || isRetained(c); 53 } 54 55 /** 56 * Throw URISyntaxException if any of the characters in the range [start, end) are not valid 57 * according to this codec. 58 * - If a char is in the whitelist or retained, it is valid both escaped and unescaped. 59 * - All escaped octets appearing in the input are structurally valid hex, i.e convertible to 60 * decimals. 61 * 62 * On success, the substring [start, end) is returned. 63 * {@code name} is not used, except to generate debugging info. 64 */ 65 public final String validate(String uri, int start, int end, String name) 66 throws URISyntaxException { 67 int i = start; 68 while (i < end) { 69 char c = uri.charAt(i++); 70 if (isWhitelistedOrRetained(c)) { 71 continue; 72 } 73 // c is either '%' or character not allowed in a uri. 74 if (c != '%') { 75 throw unexpectedCharacterException(uri, name, c, i - 1); 76 } 77 // Expect two characters representing a number in hex. 78 for (int j = 0; j < 2; j++) { 79 c = getNextCharacter(uri, i++, end, name); 80 if (hexCharToValue(c) < 0) { 81 throw unexpectedCharacterException(uri, name, c, i - 1); 82 } 83 } 84 } 85 return uri.substring(start, end); 86 } 87 88 /** 89 * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f'). 90 */ 91 private static int hexCharToValue(char c) { 92 if('0' <= c && c <= '9') { 93 return c - '0'; 94 } 95 if ('a' <= c && c <= 'f') { 96 return 10 + c - 'a'; 97 } 98 if ('A' <= c && c <= 'F') { 99 return 10 + c - 'A'; 100 } 101 return -1; 102 } 103 104 private static URISyntaxException unexpectedCharacterException( 105 String uri, String name, char unexpected, int index) { 106 String nameString = (name == null) ? "" : " in [" + name + "]"; 107 return new URISyntaxException( 108 uri, "Unexpected character" + nameString + ": " + unexpected, index); 109 } 110 111 private static char getNextCharacter(String uri, int index, int end, String name) 112 throws URISyntaxException { 113 if (index >= end) { 114 String nameString = (name == null) ? "" : " in [" + name + "]"; 115 throw new URISyntaxException( 116 uri, "Unexpected end of string" + nameString, index); 117 } 118 return uri.charAt(index); 119 } 120 121 /** 122 * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor 123 * in {@code legal}. 124 */ 125 public static void validateSimple(String uri, String legal) throws URISyntaxException { 126 for (int i = 0; i < uri.length(); i++) { 127 char c = uri.charAt(i); 128 if (!isWhitelisted(c) && legal.indexOf(c) < 0) { 129 throw unexpectedCharacterException(uri, null /* name */, c, i); 130 } 131 } 132 } 133 134 /** 135 * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 136 * 137 * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes. 138 */ 139 public final String encode(String s, Charset charset) { 140 StringBuilder builder = new StringBuilder(s.length()); 141 appendEncoded(builder, s, charset, false); 142 return builder.toString(); 143 } 144 145 /** 146 * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 147 * 148 * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). 149 */ 150 public final void appendEncoded(StringBuilder builder, String s) { 151 appendEncoded(builder, s, StandardCharsets.UTF_8, false); 152 } 153 154 /** 155 * Encodes the string {@code s} as per the rules of this encoder (see class level comment). 156 * 157 * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8). 158 * This method must produce partially encoded output. What this means is that if encoded octets 159 * appear in the input string, they are passed through unmodified, instead of being double 160 * escaped. Consider a decoder operating on the global whitelist dealing with a string 161 * foo%25bar. With this method, the output will be foo%25bar, but with appendEncoded, it 162 * will be double encoded into foo%2525bar. 163 */ 164 public final void appendPartiallyEncoded(StringBuilder builder, String s) { 165 appendEncoded(builder, s, StandardCharsets.UTF_8, true); 166 } 167 168 private void appendEncoded( 169 StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) { 170 CharsetEncoder encoder = charset.newEncoder() 171 .onMalformedInput(CodingErrorAction.REPORT) 172 .onUnmappableCharacter(CodingErrorAction.REPORT); 173 CharBuffer cBuffer = CharBuffer.allocate(s.length()); 174 for (int i = 0; i < s.length(); i++) { 175 char c = s.charAt(i); 176 if (c == '%' && partiallyEncoded) { 177 // In case there are characters waiting to be encoded. 178 flushEncodingCharBuffer(builder, encoder, cBuffer); 179 builder.append('%'); 180 continue; 181 } 182 183 if (c == ' ' && isRetained(' ')) { 184 flushEncodingCharBuffer(builder, encoder, cBuffer); 185 builder.append('+'); 186 continue; 187 } 188 189 if (isWhitelistedOrRetained(c)) { 190 flushEncodingCharBuffer(builder, encoder, cBuffer); 191 builder.append(c); 192 continue; 193 } 194 195 // Put the character in the queue for encoding. 196 cBuffer.put(c); 197 } 198 flushEncodingCharBuffer(builder, encoder, cBuffer); 199 } 200 201 private static void flushEncodingCharBuffer( 202 StringBuilder builder, 203 CharsetEncoder encoder, 204 CharBuffer cBuffer) { 205 if (cBuffer.position() == 0) { 206 return; 207 } 208 // We are reading from the buffer now. 209 cBuffer.flip(); 210 ByteBuffer byteBuffer = ByteBuffer.allocate( 211 cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar())); 212 byteBuffer.position(0); 213 CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */); 214 // According to the {@code CharsetEncoder#encode} spec, the method returns underflow 215 // and leaves an empty output when all bytes were processed correctly. 216 if (result != CoderResult.UNDERFLOW) { 217 throw new IllegalArgumentException( 218 "Error encoding, unexpected result [" 219 + result.toString() 220 + "] using encoder for [" 221 + encoder.charset().name() 222 + "]"); 223 } 224 if (cBuffer.hasRemaining()) { 225 throw new IllegalArgumentException( 226 "Encoder for [" + encoder.charset().name() + "] failed with underflow with " 227 + "remaining input [" + cBuffer + "]"); 228 } 229 // Need to flush in case the encoder saves internal state. 230 encoder.flush(byteBuffer); 231 if (result != CoderResult.UNDERFLOW) { 232 throw new IllegalArgumentException( 233 "Error encoding, unexpected result [" 234 + result.toString() 235 + "] flushing encoder for [" 236 + encoder.charset().name() 237 + "]"); 238 } 239 encoder.reset(); 240 241 byteBuffer.flip(); 242 // Write the encoded bytes. 243 while(byteBuffer.hasRemaining()) { 244 byte b = byteBuffer.get(); 245 builder.append('%'); 246 builder.append(intToHexDigit((b & 0xf0) >>> 4)); 247 builder.append(intToHexDigit(b & 0x0f)); 248 249 } 250 // Use the character buffer to write again. 251 cBuffer.flip(); 252 cBuffer.limit(cBuffer.capacity()); 253 } 254 255 private static char intToHexDigit(int b) { 256 if (b < 10) { 257 return (char) ('0' + b); 258 } else { 259 return (char) ('A' + b - 10); 260 } 261 } 262 263 /** 264 * Decode a string according to the rules of this decoder. 265 * 266 * - if {@code convertPlus == true} all + chars in the decoded output are converted to 267 * (white space) 268 * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for 269 * invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets. 270 */ 271 public static String decode( 272 String s, boolean convertPlus, Charset charset, boolean throwOnFailure) { 273 StringBuilder builder = new StringBuilder(s.length()); 274 appendDecoded(builder, s, convertPlus, charset, throwOnFailure); 275 return builder.toString(); 276 } 277 278 /** 279 * Character to be output when there's an error decoding an input. 280 */ 281 private static final char INVALID_INPUT_CHARACTER = '\ufffd'; 282 283 private static void appendDecoded( 284 StringBuilder builder, 285 String s, 286 boolean convertPlus, 287 Charset charset, 288 boolean throwOnFailure) { 289 CharsetDecoder decoder = charset.newDecoder() 290 .onMalformedInput(CodingErrorAction.REPLACE) 291 .replaceWith("\ufffd") 292 .onUnmappableCharacter(CodingErrorAction.REPORT); 293 // Holds the bytes corresponding to the escaped chars being read (empty if the last char 294 // wasn't a escaped char). 295 ByteBuffer byteBuffer = ByteBuffer.allocate(s.length()); 296 int i = 0; 297 while (i < s.length()) { 298 char c = s.charAt(i); 299 i++; 300 switch (c) { 301 case '+': 302 flushDecodingByteAccumulator( 303 builder, decoder, byteBuffer, throwOnFailure); 304 builder.append(convertPlus ? ' ' : '+'); 305 break; 306 case '%': 307 // Expect two characters representing a number in hex. 308 byte hexValue = 0; 309 for (int j = 0; j < 2; j++) { 310 try { 311 c = getNextCharacter(s, i, s.length(), null /* name */); 312 } catch (URISyntaxException e) { 313 // Unexpected end of input. 314 if (throwOnFailure) { 315 throw new IllegalArgumentException(e); 316 } else { 317 flushDecodingByteAccumulator( 318 builder, decoder, byteBuffer, throwOnFailure); 319 builder.append(INVALID_INPUT_CHARACTER); 320 return; 321 } 322 } 323 i++; 324 int newDigit = hexCharToValue(c); 325 if (newDigit < 0) { 326 if (throwOnFailure) { 327 throw new IllegalArgumentException( 328 unexpectedCharacterException(s, null /* name */, c, i - 1)); 329 } else { 330 flushDecodingByteAccumulator( 331 builder, decoder, byteBuffer, throwOnFailure); 332 builder.append(INVALID_INPUT_CHARACTER); 333 break; 334 } 335 } 336 hexValue = (byte) (hexValue * 0x10 + newDigit); 337 } 338 byteBuffer.put(hexValue); 339 break; 340 default: 341 flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); 342 builder.append(c); 343 } 344 } 345 flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure); 346 } 347 348 private static void flushDecodingByteAccumulator( 349 StringBuilder builder, 350 CharsetDecoder decoder, 351 ByteBuffer byteBuffer, 352 boolean throwOnFailure) { 353 if (byteBuffer.position() == 0) { 354 return; 355 } 356 byteBuffer.flip(); 357 try { 358 builder.append(decoder.decode(byteBuffer)); 359 } catch (CharacterCodingException e) { 360 if (throwOnFailure) { 361 throw new IllegalArgumentException(e); 362 } else { 363 builder.append(INVALID_INPUT_CHARACTER); 364 } 365 } finally { 366 // Use the byte buffer to write again. 367 byteBuffer.flip(); 368 byteBuffer.limit(byteBuffer.capacity()); 369 } 370 } 371 372 /** 373 * Equivalent to {@code decode(s, false, UTF_8, true)} 374 */ 375 public static String decode(String s) { 376 return decode( 377 s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */); 378 } 379 }