1 /* 2 * Copyright (C) 2015 Square, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.squareup.okhttp; 17 18 import java.net.URI; 19 import java.net.URL; 20 import java.util.Collections; 21 import java.util.LinkedHashMap; 22 import java.util.Map; 23 import okio.Buffer; 24 import okio.ByteString; 25 26 import static org.junit.Assert.fail; 27 28 /** Tests how each code point is encoded and decoded in the context of each URL component. */ 29 class UrlComponentEncodingTester { 30 private static final int UNICODE_2 = 0x07ff; // Arbitrary code point that's 2 bytes in UTF-8. 31 private static final int UNICODE_3 = 0xffff; // Arbitrary code point that's 3 bytes in UTF-8. 32 private static final int UNICODE_4 = 0x10ffff; // Arbitrary code point that's 4 bytes in UTF-8. 33 34 /** 35 * The default encode set for the ASCII range. The specific rules vary per-component: for example, 36 * '?' may be identity-encoded in a fragment, but must be percent-encoded in a path. 37 * 38 * See https://url.spec.whatwg.org/#percent-encoded-bytes 39 */ 40 private static final Map<Integer, Encoding> defaultEncodings; 41 static { 42 Map<Integer, Encoding> map = new LinkedHashMap<>(); 43 map.put( 0x0, Encoding.PERCENT); // Null character 44 map.put( 0x1, Encoding.PERCENT); // Start of Header 45 map.put( 0x2, Encoding.PERCENT); // Start of Text 46 map.put( 0x3, Encoding.PERCENT); // End of Text 47 map.put( 0x4, Encoding.PERCENT); // End of Transmission 48 map.put( 0x5, Encoding.PERCENT); // Enquiry 49 map.put( 0x6, Encoding.PERCENT); // Acknowledgment 50 map.put( 0x7, Encoding.PERCENT); // Bell 51 map.put((int) '\b', Encoding.PERCENT); // Backspace 52 map.put((int) '\t', Encoding.SKIP); // Horizontal Tab 53 map.put((int) '\n', Encoding.SKIP); // Line feed 54 map.put( 0xb, Encoding.PERCENT); // Vertical Tab 55 map.put((int) '\f', Encoding.SKIP); // Form feed 56 map.put((int) '\r', Encoding.SKIP); // Carriage return 57 map.put( 0xe, Encoding.PERCENT); // Shift Out 58 map.put( 0xf, Encoding.PERCENT); // Shift In 59 map.put( 0x10, Encoding.PERCENT); // Data Link Escape 60 map.put( 0x11, Encoding.PERCENT); // Device Control 1 (oft. XON) 61 map.put( 0x12, Encoding.PERCENT); // Device Control 2 62 map.put( 0x13, Encoding.PERCENT); // Device Control 3 (oft. XOFF) 63 map.put( 0x14, Encoding.PERCENT); // Device Control 4 64 map.put( 0x15, Encoding.PERCENT); // Negative Acknowledgment 65 map.put( 0x16, Encoding.PERCENT); // Synchronous idle 66 map.put( 0x17, Encoding.PERCENT); // End of Transmission Block 67 map.put( 0x18, Encoding.PERCENT); // Cancel 68 map.put( 0x19, Encoding.PERCENT); // End of Medium 69 map.put( 0x1a, Encoding.PERCENT); // Substitute 70 map.put( 0x1b, Encoding.PERCENT); // Escape 71 map.put( 0x1c, Encoding.PERCENT); // File Separator 72 map.put( 0x1d, Encoding.PERCENT); // Group Separator 73 map.put( 0x1e, Encoding.PERCENT); // Record Separator 74 map.put( 0x1f, Encoding.PERCENT); // Unit Separator 75 map.put((int) ' ', Encoding.PERCENT); 76 map.put((int) '!', Encoding.IDENTITY); 77 map.put((int) '"', Encoding.PERCENT); 78 map.put((int) '#', Encoding.PERCENT); 79 map.put((int) '$', Encoding.IDENTITY); 80 map.put((int) '%', Encoding.IDENTITY); 81 map.put((int) '&', Encoding.IDENTITY); 82 map.put((int) '\'', Encoding.IDENTITY); 83 map.put((int) '(', Encoding.IDENTITY); 84 map.put((int) ')', Encoding.IDENTITY); 85 map.put((int) '*', Encoding.IDENTITY); 86 map.put((int) '+', Encoding.IDENTITY); 87 map.put((int) ',', Encoding.IDENTITY); 88 map.put((int) '-', Encoding.IDENTITY); 89 map.put((int) '.', Encoding.IDENTITY); 90 map.put((int) '/', Encoding.IDENTITY); 91 map.put((int) '0', Encoding.IDENTITY); 92 map.put((int) '1', Encoding.IDENTITY); 93 map.put((int) '2', Encoding.IDENTITY); 94 map.put((int) '3', Encoding.IDENTITY); 95 map.put((int) '4', Encoding.IDENTITY); 96 map.put((int) '5', Encoding.IDENTITY); 97 map.put((int) '6', Encoding.IDENTITY); 98 map.put((int) '7', Encoding.IDENTITY); 99 map.put((int) '8', Encoding.IDENTITY); 100 map.put((int) '9', Encoding.IDENTITY); 101 map.put((int) ':', Encoding.IDENTITY); 102 map.put((int) ';', Encoding.IDENTITY); 103 map.put((int) '<', Encoding.PERCENT); 104 map.put((int) '=', Encoding.IDENTITY); 105 map.put((int) '>', Encoding.PERCENT); 106 map.put((int) '?', Encoding.PERCENT); 107 map.put((int) '@', Encoding.IDENTITY); 108 map.put((int) 'A', Encoding.IDENTITY); 109 map.put((int) 'B', Encoding.IDENTITY); 110 map.put((int) 'C', Encoding.IDENTITY); 111 map.put((int) 'D', Encoding.IDENTITY); 112 map.put((int) 'E', Encoding.IDENTITY); 113 map.put((int) 'F', Encoding.IDENTITY); 114 map.put((int) 'G', Encoding.IDENTITY); 115 map.put((int) 'H', Encoding.IDENTITY); 116 map.put((int) 'I', Encoding.IDENTITY); 117 map.put((int) 'J', Encoding.IDENTITY); 118 map.put((int) 'K', Encoding.IDENTITY); 119 map.put((int) 'L', Encoding.IDENTITY); 120 map.put((int) 'M', Encoding.IDENTITY); 121 map.put((int) 'N', Encoding.IDENTITY); 122 map.put((int) 'O', Encoding.IDENTITY); 123 map.put((int) 'P', Encoding.IDENTITY); 124 map.put((int) 'Q', Encoding.IDENTITY); 125 map.put((int) 'R', Encoding.IDENTITY); 126 map.put((int) 'S', Encoding.IDENTITY); 127 map.put((int) 'T', Encoding.IDENTITY); 128 map.put((int) 'U', Encoding.IDENTITY); 129 map.put((int) 'V', Encoding.IDENTITY); 130 map.put((int) 'W', Encoding.IDENTITY); 131 map.put((int) 'X', Encoding.IDENTITY); 132 map.put((int) 'Y', Encoding.IDENTITY); 133 map.put((int) 'Z', Encoding.IDENTITY); 134 map.put((int) '[', Encoding.IDENTITY); 135 map.put((int) '\\', Encoding.IDENTITY); 136 map.put((int) ']', Encoding.IDENTITY); 137 map.put((int) '^', Encoding.IDENTITY); 138 map.put((int) '_', Encoding.IDENTITY); 139 map.put((int) '`', Encoding.PERCENT); 140 map.put((int) 'a', Encoding.IDENTITY); 141 map.put((int) 'b', Encoding.IDENTITY); 142 map.put((int) 'c', Encoding.IDENTITY); 143 map.put((int) 'd', Encoding.IDENTITY); 144 map.put((int) 'e', Encoding.IDENTITY); 145 map.put((int) 'f', Encoding.IDENTITY); 146 map.put((int) 'g', Encoding.IDENTITY); 147 map.put((int) 'h', Encoding.IDENTITY); 148 map.put((int) 'i', Encoding.IDENTITY); 149 map.put((int) 'j', Encoding.IDENTITY); 150 map.put((int) 'k', Encoding.IDENTITY); 151 map.put((int) 'l', Encoding.IDENTITY); 152 map.put((int) 'm', Encoding.IDENTITY); 153 map.put((int) 'n', Encoding.IDENTITY); 154 map.put((int) 'o', Encoding.IDENTITY); 155 map.put((int) 'p', Encoding.IDENTITY); 156 map.put((int) 'q', Encoding.IDENTITY); 157 map.put((int) 'r', Encoding.IDENTITY); 158 map.put((int) 's', Encoding.IDENTITY); 159 map.put((int) 't', Encoding.IDENTITY); 160 map.put((int) 'u', Encoding.IDENTITY); 161 map.put((int) 'v', Encoding.IDENTITY); 162 map.put((int) 'w', Encoding.IDENTITY); 163 map.put((int) 'x', Encoding.IDENTITY); 164 map.put((int) 'y', Encoding.IDENTITY); 165 map.put((int) 'z', Encoding.IDENTITY); 166 map.put((int) '{', Encoding.IDENTITY); 167 map.put((int) '|', Encoding.IDENTITY); 168 map.put((int) '}', Encoding.IDENTITY); 169 map.put((int) '~', Encoding.IDENTITY); 170 map.put( 0x7f, Encoding.PERCENT); // Delete 171 map.put( UNICODE_2, Encoding.PERCENT); 172 map.put( UNICODE_3, Encoding.PERCENT); 173 map.put( UNICODE_4, Encoding.PERCENT); 174 defaultEncodings = Collections.unmodifiableMap(map); 175 } 176 177 private final Map<Integer, Encoding> encodings; 178 private final StringBuilder uriEscapedCodePoints = new StringBuilder(); 179 180 public UrlComponentEncodingTester() { 181 this.encodings = new LinkedHashMap<>(defaultEncodings); 182 } 183 184 public UrlComponentEncodingTester override(Encoding encoding, int... codePoints) { 185 for (int codePoint : codePoints) { 186 encodings.put(codePoint, encoding); 187 } 188 return this; 189 } 190 191 public UrlComponentEncodingTester identityForNonAscii() { 192 encodings.put(UNICODE_2, Encoding.IDENTITY); 193 encodings.put(UNICODE_3, Encoding.IDENTITY); 194 encodings.put(UNICODE_4, Encoding.IDENTITY); 195 return this; 196 } 197 198 /** 199 * Configure a character to be skipped but only for conversion to and from {@code java.net.URI}. 200 * That class is more strict than the others. 201 */ 202 public UrlComponentEncodingTester skipForUri(int... codePoints) { 203 uriEscapedCodePoints.append(new String(codePoints, 0, codePoints.length)); 204 return this; 205 } 206 207 public UrlComponentEncodingTester test(Component component) { 208 for (Map.Entry<Integer, Encoding> entry : encodings.entrySet()) { 209 Encoding encoding = entry.getValue(); 210 int codePoint = entry.getKey(); 211 testEncodeAndDecode(codePoint, component); 212 if (encoding == Encoding.SKIP) continue; 213 214 testParseOriginal(codePoint, encoding, component); 215 testParseAlreadyEncoded(codePoint, encoding, component); 216 testToUrl(codePoint, encoding, component); 217 testFromUrl(codePoint, encoding, component); 218 219 if (codePoint != '%') { 220 boolean uriEscaped = uriEscapedCodePoints.indexOf( 221 Encoding.IDENTITY.encode(codePoint)) != -1; 222 testUri(codePoint, encoding, component, uriEscaped); 223 } 224 } 225 return this; 226 } 227 228 private void testParseAlreadyEncoded(int codePoint, Encoding encoding, Component component) { 229 String encoded = encoding.encode(codePoint); 230 String urlString = component.urlString(encoded); 231 HttpUrl url = HttpUrl.parse(urlString); 232 if (!component.encodedValue(url).equals(encoded)) { 233 fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding)); 234 } 235 } 236 237 private void testEncodeAndDecode(int codePoint, Component component) { 238 String expected = Encoding.IDENTITY.encode(codePoint); 239 HttpUrl.Builder builder = HttpUrl.parse("http://host/").newBuilder(); 240 component.set(builder, expected); 241 HttpUrl url = builder.build(); 242 String actual = component.get(url); 243 if (!expected.equals(actual)) { 244 fail(String.format("Roundtrip %s %#x %s", component, codePoint, url)); 245 } 246 } 247 248 private void testParseOriginal(int codePoint, Encoding encoding, Component component) { 249 String encoded = encoding.encode(codePoint); 250 if (encoding != Encoding.PERCENT) return; 251 String identity = Encoding.IDENTITY.encode(codePoint); 252 String urlString = component.urlString(identity); 253 HttpUrl url = HttpUrl.parse(urlString); 254 255 String s = component.encodedValue(url); 256 if (!s.equals(encoded)) { 257 fail(String.format("Encoding %s %#02x using %s", component, codePoint, encoding)); 258 } 259 } 260 261 private void testToUrl(int codePoint, Encoding encoding, Component component) { 262 String encoded = encoding.encode(codePoint); 263 HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded)); 264 URL javaNetUrl = httpUrl.url(); 265 if (!javaNetUrl.toString().equals(javaNetUrl.toString())) { 266 fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding)); 267 } 268 } 269 270 private void testFromUrl(int codePoint, Encoding encoding, Component component) { 271 String encoded = encoding.encode(codePoint); 272 HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded)); 273 HttpUrl toAndFromJavaNetUrl = HttpUrl.get(httpUrl.url()); 274 if (!toAndFromJavaNetUrl.equals(httpUrl)) { 275 fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding)); 276 } 277 } 278 279 private void testUri( 280 int codePoint, Encoding encoding, Component component, boolean uriEscaped) { 281 String string = new String(new int[] { codePoint }, 0, 1); 282 String encoded = encoding.encode(codePoint); 283 HttpUrl httpUrl = HttpUrl.parse(component.urlString(encoded)); 284 URI uri = httpUrl.uri(); 285 HttpUrl toAndFromUri = HttpUrl.get(uri); 286 if (uriEscaped) { 287 // The URI has more escaping than the HttpURL. Check that the decoded values still match. 288 if (uri.toString().equals(httpUrl.toString())) { 289 fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding)); 290 } 291 if (!component.get(toAndFromUri).equals(string)) { 292 fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding)); 293 } 294 } else { 295 // Check that the URI and HttpURL have the exact same escaping. 296 if (!toAndFromUri.equals(httpUrl)) { 297 fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding)); 298 } 299 if (!uri.toString().equals(httpUrl.toString())) { 300 fail(String.format("Encoding %s %#x using %s", component, codePoint, encoding)); 301 } 302 } 303 } 304 305 public enum Encoding { 306 IDENTITY { 307 public String encode(int codePoint) { 308 return new String(new int[] { codePoint }, 0, 1); 309 } 310 }, 311 312 PERCENT { 313 public String encode(int codePoint) { 314 ByteString utf8 = ByteString.encodeUtf8(IDENTITY.encode(codePoint)); 315 Buffer percentEncoded = new Buffer(); 316 for (int i = 0; i < utf8.size(); i++) { 317 percentEncoded.writeUtf8(String.format("%%%02X", utf8.getByte(i) & 0xff)); 318 } 319 return percentEncoded.readUtf8(); 320 } 321 }, 322 323 SKIP; 324 325 public String encode(int codePoint) { 326 throw new UnsupportedOperationException(); 327 } 328 } 329 330 public enum Component { 331 USER { 332 @Override public String urlString(String value) { 333 return "http://" + value + "@example.com/"; 334 } 335 @Override public String encodedValue(HttpUrl url) { 336 return url.encodedUsername(); 337 } 338 @Override public void set(HttpUrl.Builder builder, String value) { 339 builder.username(value); 340 } 341 @Override public String get(HttpUrl url) { 342 return url.username(); 343 } 344 }, 345 PASSWORD { 346 @Override public String urlString(String value) { 347 return "http://:" + value + "@example.com/"; 348 } 349 @Override public String encodedValue(HttpUrl url) { 350 return url.encodedPassword(); 351 } 352 @Override public void set(HttpUrl.Builder builder, String value) { 353 builder.password(value); 354 } 355 @Override public String get(HttpUrl url) { 356 return url.password(); 357 } 358 }, 359 PATH { 360 @Override public String urlString(String value) { 361 return "http://example.com/a" + value + "z/"; 362 } 363 @Override public String encodedValue(HttpUrl url) { 364 String path = url.encodedPath(); 365 return path.substring(2, path.length() - 2); 366 } 367 @Override public void set(HttpUrl.Builder builder, String value) { 368 builder.addPathSegment("a" + value + "z"); 369 } 370 @Override public String get(HttpUrl url) { 371 String pathSegment = url.pathSegments().get(0); 372 return pathSegment.substring(1, pathSegment.length() - 1); 373 } 374 }, 375 QUERY { 376 @Override public String urlString(String value) { 377 return "http://example.com/?a" + value + "z"; 378 } 379 @Override public String encodedValue(HttpUrl url) { 380 String query = url.encodedQuery(); 381 return query.substring(1, query.length() - 1); 382 } 383 @Override public void set(HttpUrl.Builder builder, String value) { 384 builder.query("a" + value + "z"); 385 } 386 @Override public String get(HttpUrl url) { 387 String query = url.query(); 388 return query.substring(1, query.length() - 1); 389 } 390 }, 391 FRAGMENT { 392 @Override public String urlString(String value) { 393 return "http://example.com/#a" + value + "z"; 394 } 395 @Override public String encodedValue(HttpUrl url) { 396 String fragment = url.encodedFragment(); 397 return fragment.substring(1, fragment.length() - 1); 398 } 399 @Override public void set(HttpUrl.Builder builder, String value) { 400 builder.fragment("a" + value + "z"); 401 } 402 @Override public String get(HttpUrl url) { 403 String fragment = url.fragment(); 404 return fragment.substring(1, fragment.length() - 1); 405 } 406 }; 407 408 public abstract String urlString(String value); 409 410 public abstract String encodedValue(HttpUrl url); 411 412 public abstract void set(HttpUrl.Builder builder, String value); 413 414 public abstract String get(HttpUrl url); 415 } 416 } 417