1 /* 2 * Copyright (C) 2015 Square, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.squareup.okhttp; 17 18 import java.net.IDN; 19 import java.net.InetAddress; 20 import java.net.MalformedURLException; 21 import java.net.URI; 22 import java.net.URISyntaxException; 23 import java.net.URL; 24 import java.net.UnknownHostException; 25 import java.util.ArrayList; 26 import java.util.Arrays; 27 import java.util.Collections; 28 import java.util.LinkedHashSet; 29 import java.util.List; 30 import java.util.Locale; 31 import java.util.Set; 32 import okio.Buffer; 33 34 /** 35 * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this 36 * class to compose and decompose Internet addresses. For example, this code will compose and print 37 * a URL for Google search: <pre> {@code 38 * 39 * HttpUrl url = new HttpUrl.Builder() 40 * .scheme("https") 41 * .host("www.google.com") 42 * .addPathSegment("search") 43 * .addQueryParameter("q", "polar bears") 44 * .build(); 45 * System.out.println(url); 46 * }</pre> 47 * 48 * which prints: <pre> {@code 49 * 50 * https://www.google.com/search?q=polar%20bears 51 * }</pre> 52 * 53 * As another example, this code prints the human-readable query parameters of a Twitter search: 54 * <pre> {@code 55 * 56 * HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images"); 57 * for (int i = 0, size = url.querySize(); i < size; i++) { 58 * System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i)); 59 * } 60 * }</pre> 61 * 62 * which prints: <pre> {@code 63 * 64 * q: cute #puppies 65 * f: images 66 * }</pre> 67 * 68 * In addition to composing URLs from their component parts and decomposing URLs into their 69 * component parts, this class implements relative URL resolution: what address you'd reach by 70 * clicking a relative link on a specified page. For example: <pre> {@code 71 * 72 * HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos"); 73 * HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc"); 74 * System.out.println(link); 75 * }</pre> 76 * 77 * which prints: <pre> {@code 78 * 79 * https://www.youtube.com/watch?v=cbP2N1BQdYc 80 * }</pre> 81 * 82 * <h3>What's in a URL?</h3> 83 * 84 * A URL has several components. 85 * 86 * <h4>Scheme</h4> 87 * Sometimes referred to as <i>protocol</i>, A URL's scheme describes what mechanism should be used 88 * to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file}, {@code 89 * ftp}), this class only supports {@code http} and {@code https}. Use {@link URI java.net.URI} for 90 * URLs with arbitrary schemes. 91 * 92 * <h4>Username and Password</h4> 93 * Username and password are either present, or the empty string {@code ""} if absent. This class 94 * offers no mechanism to differentiate empty from absent. Neither of these components are popular 95 * in practice. Typically HTTP applications use other mechanisms for user identification and 96 * authentication. 97 * 98 * <h4>Host</h4> 99 * The host identifies the webserver that serves the URL's resource. It is either a hostname like 100 * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6 101 * address like {@code ::1}. 102 * 103 * <p>Usually a webserver is reachable with multiple identifiers: its IP addresses, registered 104 * domain names, and even {@code localhost} when connecting from the server itself. Each of a 105 * webserver's names is a distinct URL and they are not interchangeable. For example, even if 106 * {@code http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by 107 * the same IP address, the two URLs identify different resources. 108 * 109 * <h4>Port</h4> 110 * The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS. This 111 * class never returns -1 for the port: if no port is explicitly specified in the URL then the 112 * scheme's default is used. 113 * 114 * <h4>Path</h4> 115 * The path identifies a specific resource on the host. Paths have a hierarchical structure like 116 * "/square/okhttp/issues/1486". Each path segment is prefixed with "/". This class offers methods 117 * to compose and decompose paths by segment. If a path's last segment is the empty string, then the 118 * path ends with "/". This class always builds non-empty paths: if the path is omitted it defaults 119 * to "/", which is a path whose only segment is the empty string. 120 * 121 * <h4>Query</h4> 122 * The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query string 123 * is subdivided into a collection of name-value parameters. This class offers methods to set the 124 * query as the single string, or as individual name-value parameters. With name-value parameters 125 * the values are optional and names may be repeated. 126 * 127 * <h4>Fragment</h4> 128 * The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and query 129 * the fragment is not sent to the webserver: it's private to the client. 130 * 131 * <h3>Encoding</h3> 132 * Each component must be encoded before it is embedded in the complete URL. As we saw above, the 133 * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query 134 * parameter value. 135 * 136 * <h4>Percent encoding</h4> 137 * Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes (like 138 * {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters, 139 * non-ASCII characters, and characters that already have another meaning in a particular context. 140 * 141 * <p>Percent encoding is used in every URL component except for the hostname. But the set of 142 * characters that need to be encoded is different for each component. For example, the path 143 * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the 144 * start of the URL's query. But within the query and fragment components, the {@code ?} character 145 * doesn't delimit anything and doesn't need to be escaped. <pre> {@code 146 * 147 * HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder() 148 * .addPathSegment("_Who?_") 149 * .query("_Who?_") 150 * .fragment("_Who?_") 151 * .build(); 152 * System.out.println(url); 153 * }</pre> 154 * 155 * This prints: <pre> {@code 156 * 157 * http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_ 158 * }</pre> 159 * 160 * When parsing URLs that lack percent encoding where it is required, this class will percent encode 161 * the offending characters. 162 * 163 * <h4>IDNA Mapping and Punycode encoding</h4> 164 * Hostnames have different requirements and use a different encoding scheme. It consists of IDNA 165 * mapping and Punycode encoding. 166 * 167 * <p>In order to avoid confusion and discourage phishing attacks, 168 * <a href="http://www.unicode.org/reports/tr46/#ToASCII">IDNA Mapping</a> transforms names to avoid 169 * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM} 170 * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the 171 * Unicode trademark sign () could be confused for the letters "TM" in {@code http://homail.com}. 172 * To mitigate this, the single character () maps to the string (tm). There is similar policy for 173 * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are 174 * not mapped and cannot be used in a hostname. 175 * 176 * <p><a href="http://ietf.org/rfc/rfc3492.txt">Punycode</a> converts a Unicode string to an ASCII 177 * string to make international domain names work everywhere. For example, "" encodes as 178 * "xn--4xa". The encoded string is not human readable, but can be used with classes like {@link 179 * InetAddress} to establish connections. 180 * 181 * <h3>Why another URL model?</h3> 182 * Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL 183 * model to address problems that the others don't. 184 * 185 * <h4>Different URLs should be different</h4> 186 * Although they have different content, {@code java.net.URL} considers the following two URLs 187 * equal, and the {@link Object#equals equals()} method between them returns true: 188 * <ul> 189 * <li>http://square.github.io/ 190 * <li>http://google.github.io/ 191 * </ul> 192 * This is because those two hosts share the same IP address. This is an old, bad design decision 193 * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link 194 * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may 195 * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are 196 * hosted. 197 * 198 * <h4>Equal URLs should be equal</h4> 199 * These two URLs are semantically identical, but {@code java.net.URI} disagrees: 200 * <ul> 201 * <li>http://host:80/ 202 * <li>http://host 203 * </ul> 204 * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /}) 205 * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any 206 * application that stores information-per-URL will need to either canonicalize manually, or suffer 207 * unnecessary redundancy for such URLs. 208 * 209 * <p>Because they don't attempt canonical form, these classes are surprisingly difficult to use 210 * securely. Suppose you're building a webservice that checks that incoming paths are prefixed 211 * "/static/images/" before serving the corresponding assets from the filesystem. <pre> {@code 212 * 213 * String attack = "http://example.com/static/images/../../../../../etc/passwd"; 214 * System.out.println(new URL(attack).getPath()); 215 * System.out.println(new URI(attack).getPath()); 216 * System.out.println(HttpUrl.parse(attack).path()); 217 * }</pre> 218 * 219 * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that 220 * checks only the path prefix may suffer! 221 * <pre> {@code 222 * 223 * /static/images/../../../../../etc/passwd 224 * /static/images/../../../../../etc/passwd 225 * /etc/passwd 226 * }</pre> 227 * 228 * <h4>If it works on the web, it should work in your application</h4> 229 * The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like 230 * "http://example.com/abc|def" because the '|' character is unsupported. This class is more 231 * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef". 232 * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with 233 * major web browsers over consistency with obsolete specifications. 234 * 235 * <h4>Paths and Queries should decompose</h4> 236 * Neither of the built-in URL models offer direct access to path segments or query parameters. 237 * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+' 238 * characters get silently replaced with spaces? If a query parameter contains a '&', does that 239 * get escaped? By offering methods to read and write individual query parameters directly, 240 * application developers are saved from the hassles of encoding and decoding. 241 * 242 * <h4>Plus a modern API</h4> 243 * The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping 244 * constructors. For example, there's no API to compose a URI with a custom port without also 245 * providing a query and fragment. 246 * 247 * <p>Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With 248 * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and 249 * path but no hostname. Building APIs that consume such malformed values is difficult! 250 * 251 * <p>This class has a modern API. It avoids punitive checked exceptions: {@link #parse parse()} 252 * returns null if the input is an invalid URL. You can even be explicit about whether each 253 * component has been encoded already. 254 */ 255 public final class HttpUrl { 256 private static final char[] HEX_DIGITS = 257 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 258 static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"; 259 static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#"; 260 static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#"; 261 static final String PATH_SEGMENT_ENCODE_SET_URI = "[]"; 262 static final String QUERY_ENCODE_SET = " \"'<>#"; 263 static final String QUERY_COMPONENT_ENCODE_SET = " \"'<>#&="; 264 static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}"; 265 static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~"; 266 static final String FRAGMENT_ENCODE_SET = ""; 267 static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}"; 268 269 /** Either "http" or "https". */ 270 private final String scheme; 271 272 /** Decoded username. */ 273 private final String username; 274 275 /** Decoded password. */ 276 private final String password; 277 278 /** Canonical hostname. */ 279 private final String host; 280 281 /** Either 80, 443 or a user-specified port. In range [1..65535]. */ 282 private final int port; 283 284 /** 285 * A list of canonical path segments. This list always contains at least one element, which may 286 * be the empty string. Each segment is formatted with a leading '/', so if path segments were 287 * ["a", "b", ""], then the encoded path would be "/a/b/". 288 */ 289 private final List<String> pathSegments; 290 291 /** 292 * Alternating, decoded query names and values, or null for no query. Names may be empty or 293 * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or 294 * empty, or non-empty. 295 */ 296 private final List<String> queryNamesAndValues; 297 298 /** Decoded fragment. */ 299 private final String fragment; 300 301 /** Canonical URL. */ 302 private final String url; 303 304 private HttpUrl(Builder builder) { 305 this.scheme = builder.scheme; 306 this.username = percentDecode(builder.encodedUsername, false); 307 this.password = percentDecode(builder.encodedPassword, false); 308 this.host = builder.host; 309 this.port = builder.effectivePort(); 310 this.pathSegments = percentDecode(builder.encodedPathSegments, false); 311 this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null 312 ? percentDecode(builder.encodedQueryNamesAndValues, true) 313 : null; 314 this.fragment = builder.encodedFragment != null 315 ? percentDecode(builder.encodedFragment, false) 316 : null; 317 this.url = builder.toString(); 318 } 319 320 /** Returns this URL as a {@link URL java.net.URL}. */ 321 public URL url() { 322 try { 323 return new URL(url); 324 } catch (MalformedURLException e) { 325 throw new RuntimeException(e); // Unexpected! 326 } 327 } 328 329 /** 330 * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this 331 * class, the returned URI may be semantically different from this URL: 332 * <ul> 333 * <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped. 334 * <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}. 335 * <li>Whitespace and control characters in the fragment will be stripped. 336 * </ul> 337 * 338 * <p>These differences may have a significant consequence when the URI is interpretted by a 339 * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided. 340 */ 341 public URI uri() { 342 String uri = newBuilder().reencodeForUri().toString(); 343 try { 344 return new URI(uri); 345 } catch (URISyntaxException e) { 346 // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry. 347 try { 348 String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", ""); 349 return URI.create(stripped); 350 } catch (Exception e1) { 351 throw new RuntimeException(e); // Unexpected! 352 } 353 } 354 } 355 356 /** Returns either "http" or "https". */ 357 public String scheme() { 358 return scheme; 359 } 360 361 public boolean isHttps() { 362 return scheme.equals("https"); 363 } 364 365 /** Returns the username, or an empty string if none is set. */ 366 public String encodedUsername() { 367 if (username.isEmpty()) return ""; 368 int usernameStart = scheme.length() + 3; // "://".length() == 3. 369 int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@"); 370 return url.substring(usernameStart, usernameEnd); 371 } 372 373 public String username() { 374 return username; 375 } 376 377 /** Returns the password, or an empty string if none is set. */ 378 public String encodedPassword() { 379 if (password.isEmpty()) return ""; 380 int passwordStart = url.indexOf(':', scheme.length() + 3) + 1; 381 int passwordEnd = url.indexOf('@'); 382 return url.substring(passwordStart, passwordEnd); 383 } 384 385 /** Returns the decoded password, or an empty string if none is present. */ 386 public String password() { 387 return password; 388 } 389 390 /** 391 * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May 392 * be: 393 * <ul> 394 * <li>A regular host name, like {@code android.com}. 395 * <li>An IPv4 address, like {@code 127.0.0.1}. 396 * <li>An IPv6 address, like {@code ::1}. Note that there are no square braces. 397 * <li>An encoded IDN, like {@code xn--n3h.net}. 398 * </ul> 399 */ 400 public String host() { 401 return host; 402 } 403 404 /** 405 * Same as {@link #host} except that literal IPv6 addresses are surrounding by square 406 * braces. For example, this method will return {@code [::1]} where {@code host} returns 407 * {@code ::1}. 408 */ 409 public String rfc2732host() { 410 if (host.indexOf(':') == -1) { 411 return host; 412 } 413 414 return "[" + host + "]"; 415 } 416 417 /** 418 * Returns the explicitly-specified port if one was provided, or the default port for this URL's 419 * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code 420 * https://square.com/}. The result is in {@code [1..65535]}. 421 */ 422 public int port() { 423 return port; 424 } 425 426 /** 427 * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1 428 * otherwise. 429 */ 430 public static int defaultPort(String scheme) { 431 if (scheme.equals("http")) { 432 return 80; 433 } else if (scheme.equals("https")) { 434 return 443; 435 } else { 436 return -1; 437 } 438 } 439 440 public int pathSize() { 441 return pathSegments.size(); 442 } 443 444 /** 445 * Returns the entire path of this URL, encoded for use in HTTP resource resolution. 446 // ANDROID-BEGIN: http://b/29983827 447 // * The returned path is always nonempty and is prefixed with {@code /}. 448 // ANDROID-END: http://b/29983827 449 */ 450 public String encodedPath() { 451 int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3. 452 // ANDROID-BEGIN: http://b/29983827 453 if (pathStart == -1) { 454 return ""; 455 } 456 // ANDROID-END: http://b/29983827 457 int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#"); 458 return url.substring(pathStart, pathEnd); 459 } 460 461 static void pathSegmentsToString(StringBuilder out, List<String> pathSegments) { 462 for (int i = 0, size = pathSegments.size(); i < size; i++) { 463 out.append('/'); 464 out.append(pathSegments.get(i)); 465 } 466 } 467 468 public List<String> encodedPathSegments() { 469 int pathStart = url.indexOf('/', scheme.length() + 3); 470 // ANDROID-BEGIN: http://b/29983827 471 if (pathStart == -1) { 472 return new ArrayList<>(); 473 } 474 // ANDROID-END: http://b/29983827 475 476 int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#"); 477 List<String> result = new ArrayList<>(); 478 for (int i = pathStart; i < pathEnd; ) { 479 i++; // Skip the '/'. 480 int segmentEnd = delimiterOffset(url, i, pathEnd, "/"); 481 result.add(url.substring(i, segmentEnd)); 482 i = segmentEnd; 483 } 484 return result; 485 } 486 487 public List<String> pathSegments() { 488 return pathSegments; 489 } 490 491 /** 492 * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string 493 * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all 494 * other URLs). 495 */ 496 public String encodedQuery() { 497 if (queryNamesAndValues == null) return null; // No query. 498 int queryStart = url.indexOf('?') + 1; 499 int queryEnd = delimiterOffset(url, queryStart + 1, url.length(), "#"); 500 return url.substring(queryStart, queryEnd); 501 } 502 503 static void namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues) { 504 for (int i = 0, size = namesAndValues.size(); i < size; i += 2) { 505 String name = namesAndValues.get(i); 506 String value = namesAndValues.get(i + 1); 507 if (i > 0) out.append('&'); 508 out.append(name); 509 if (value != null) { 510 out.append('='); 511 out.append(value); 512 } 513 } 514 } 515 516 /** 517 * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a 518 * query string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject", 519 * "math", "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain 520 * '=' characters. 521 */ 522 static List<String> queryStringToNamesAndValues(String encodedQuery) { 523 List<String> result = new ArrayList<>(); 524 for (int pos = 0; pos <= encodedQuery.length(); ) { 525 int ampersandOffset = encodedQuery.indexOf('&', pos); 526 if (ampersandOffset == -1) ampersandOffset = encodedQuery.length(); 527 528 int equalsOffset = encodedQuery.indexOf('=', pos); 529 if (equalsOffset == -1 || equalsOffset > ampersandOffset) { 530 result.add(encodedQuery.substring(pos, ampersandOffset)); 531 result.add(null); // No value for this name. 532 } else { 533 result.add(encodedQuery.substring(pos, equalsOffset)); 534 result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset)); 535 } 536 pos = ampersandOffset + 1; 537 } 538 return result; 539 } 540 541 public String query() { 542 if (queryNamesAndValues == null) return null; // No query. 543 StringBuilder result = new StringBuilder(); 544 namesAndValuesToQueryString(result, queryNamesAndValues); 545 return result.toString(); 546 } 547 548 public int querySize() { 549 return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0; 550 } 551 552 /** 553 * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is 554 * no such query parameter. 555 */ 556 public String queryParameter(String name) { 557 if (queryNamesAndValues == null) return null; 558 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { 559 if (name.equals(queryNamesAndValues.get(i))) { 560 return queryNamesAndValues.get(i + 1); 561 } 562 } 563 return null; 564 } 565 566 public Set<String> queryParameterNames() { 567 if (queryNamesAndValues == null) return Collections.emptySet(); 568 Set<String> result = new LinkedHashSet<>(); 569 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { 570 result.add(queryNamesAndValues.get(i)); 571 } 572 return Collections.unmodifiableSet(result); 573 } 574 575 public List<String> queryParameterValues(String name) { 576 if (queryNamesAndValues == null) return Collections.emptyList(); 577 List<String> result = new ArrayList<>(); 578 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) { 579 if (name.equals(queryNamesAndValues.get(i))) { 580 result.add(queryNamesAndValues.get(i + 1)); 581 } 582 } 583 return Collections.unmodifiableList(result); 584 } 585 586 public String queryParameterName(int index) { 587 return queryNamesAndValues.get(index * 2); 588 } 589 590 public String queryParameterValue(int index) { 591 return queryNamesAndValues.get(index * 2 + 1); 592 } 593 594 public String encodedFragment() { 595 if (fragment == null) return null; 596 int fragmentStart = url.indexOf('#') + 1; 597 return url.substring(fragmentStart); 598 } 599 600 public String fragment() { 601 return fragment; 602 } 603 604 /** Returns the URL that would be retrieved by following {@code link} from this URL. */ 605 public HttpUrl resolve(String link) { 606 // ANDROID-BEGIN: http://b/29983827 607 // Builder builder = new Builder(); 608 Builder builder = new Builder(false); 609 // ANDROID-END: http://b/29983827 610 Builder.ParseResult result = builder.parse(this, link); 611 return result == Builder.ParseResult.SUCCESS ? builder.build() : null; 612 } 613 614 public Builder newBuilder() { 615 // ANDROID-BEGIN: http://b/29983827 616 // Builder builder = new Builder(); 617 Builder result = new Builder(false); 618 // ANDROID-END: http://b/29983827 619 result.scheme = scheme; 620 result.encodedUsername = encodedUsername(); 621 result.encodedPassword = encodedPassword(); 622 result.host = host; 623 // If we're set to a default port, unset it in case of a scheme change. 624 result.port = port != defaultPort(scheme) ? port : -1; 625 result.encodedPathSegments.clear(); 626 result.encodedPathSegments.addAll(encodedPathSegments()); 627 result.encodedQuery(encodedQuery()); 628 result.encodedFragment = encodedFragment(); 629 return result; 630 } 631 632 /** 633 * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS 634 * URL, or null if it isn't. 635 */ 636 public static HttpUrl parse(String url) { 637 // ANDROID-BEGIN: http://b/29983827 638 // Builder builder = new Builder(); 639 Builder builder = new Builder(false); 640 // ANDROID-END: http://b/29983827 641 Builder.ParseResult result = builder.parse(null, url); 642 return result == Builder.ParseResult.SUCCESS ? builder.build() : null; 643 } 644 645 /** 646 * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or 647 * null if it has any other protocol. 648 */ 649 public static HttpUrl get(URL url) { 650 return parse(url.toString()); 651 } 652 653 /** 654 * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS 655 * URL, or throws an exception if it isn't. 656 * 657 * @throws MalformedURLException if there was a non-host related URL issue 658 * @throws UnknownHostException if the host was invalid 659 */ 660 static HttpUrl getChecked(String url) throws MalformedURLException, UnknownHostException { 661 // ANDROID-END: http://b/29983827 662 // Builder builder = new Builder(); 663 Builder builder = new Builder(false); 664 // ANDROID-END: http://b/29983827 665 Builder.ParseResult result = builder.parse(null, url); 666 switch (result) { 667 case SUCCESS: 668 return builder.build(); 669 case INVALID_HOST: 670 throw new UnknownHostException("Invalid host: " + url); 671 case UNSUPPORTED_SCHEME: 672 case MISSING_SCHEME: 673 case INVALID_PORT: 674 default: 675 throw new MalformedURLException("Invalid URL: " + result + " for " + url); 676 } 677 } 678 679 public static HttpUrl get(URI uri) { 680 return parse(uri.toString()); 681 } 682 683 @Override public boolean equals(Object o) { 684 return o instanceof HttpUrl && ((HttpUrl) o).url.equals(url); 685 } 686 687 @Override public int hashCode() { 688 return url.hashCode(); 689 } 690 691 @Override public String toString() { 692 return url; 693 } 694 695 public static final class Builder { 696 String scheme; 697 String encodedUsername = ""; 698 String encodedPassword = ""; 699 String host; 700 int port = -1; 701 final List<String> encodedPathSegments = new ArrayList<>(); 702 List<String> encodedQueryNamesAndValues; 703 String encodedFragment; 704 705 // ANDROID-BEGIN: http://b/29983827 706 // public Builder() { 707 // encodedPathSegments.add(""); // The default path is '/' which needs a trailing space. 708 // } 709 710 public Builder() { 711 this(true); // // The default path is '/' which needs a trailing space. 712 } 713 714 private Builder(boolean startWithSlash) { 715 if (startWithSlash) { 716 encodedPathSegments.add(""); 717 } 718 } 719 // ANDROID-END: http://b/29983827 720 721 public Builder scheme(String scheme) { 722 if (scheme == null) { 723 throw new IllegalArgumentException("scheme == null"); 724 } else if (scheme.equalsIgnoreCase("http")) { 725 this.scheme = "http"; 726 } else if (scheme.equalsIgnoreCase("https")) { 727 this.scheme = "https"; 728 } else { 729 throw new IllegalArgumentException("unexpected scheme: " + scheme); 730 } 731 return this; 732 } 733 734 public Builder username(String username) { 735 if (username == null) throw new IllegalArgumentException("username == null"); 736 this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true); 737 return this; 738 } 739 740 public Builder encodedUsername(String encodedUsername) { 741 if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null"); 742 this.encodedUsername = canonicalize( 743 encodedUsername, USERNAME_ENCODE_SET, true, false, false, true); 744 return this; 745 } 746 747 public Builder password(String password) { 748 if (password == null) throw new IllegalArgumentException("password == null"); 749 this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true); 750 return this; 751 } 752 753 public Builder encodedPassword(String encodedPassword) { 754 if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null"); 755 this.encodedPassword = canonicalize( 756 encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true); 757 return this; 758 } 759 760 /** 761 * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6 762 * address. 763 */ 764 public Builder host(String host) { 765 if (host == null) throw new IllegalArgumentException("host == null"); 766 String encoded = canonicalizeHost(host, 0, host.length()); 767 if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host); 768 this.host = encoded; 769 return this; 770 } 771 772 public Builder port(int port) { 773 if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port); 774 this.port = port; 775 return this; 776 } 777 778 int effectivePort() { 779 return port != -1 ? port : defaultPort(scheme); 780 } 781 782 public Builder addPathSegment(String pathSegment) { 783 if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null"); 784 push(pathSegment, 0, pathSegment.length(), false, false); 785 return this; 786 } 787 788 public Builder addEncodedPathSegment(String encodedPathSegment) { 789 if (encodedPathSegment == null) { 790 throw new IllegalArgumentException("encodedPathSegment == null"); 791 } 792 push(encodedPathSegment, 0, encodedPathSegment.length(), false, true); 793 return this; 794 } 795 796 public Builder setPathSegment(int index, String pathSegment) { 797 if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null"); 798 String canonicalPathSegment = canonicalize( 799 pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true); 800 if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { 801 throw new IllegalArgumentException("unexpected path segment: " + pathSegment); 802 } 803 encodedPathSegments.set(index, canonicalPathSegment); 804 return this; 805 } 806 807 public Builder setEncodedPathSegment(int index, String encodedPathSegment) { 808 if (encodedPathSegment == null) { 809 throw new IllegalArgumentException("encodedPathSegment == null"); 810 } 811 String canonicalPathSegment = canonicalize(encodedPathSegment, 812 0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true); 813 encodedPathSegments.set(index, canonicalPathSegment); 814 if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) { 815 throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment); 816 } 817 return this; 818 } 819 820 public Builder removePathSegment(int index) { 821 encodedPathSegments.remove(index); 822 // ANDROID-BEGIN: http://b/29983827. Note this method only used from tests. 823 // Only changed for consistency. 824 // if (encodedPathSegments.isEmpty()) { 825 // encodedPathSegments.add(""); // Always leave at least one '/'. 826 // } 827 // ANDROID-END: http://b/29983827 - only used from tests 828 return this; 829 } 830 831 public Builder encodedPath(String encodedPath) { 832 if (encodedPath == null) throw new IllegalArgumentException("encodedPath == null"); 833 if (!encodedPath.startsWith("/")) { 834 throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath); 835 } 836 resolvePath(encodedPath, 0, encodedPath.length()); 837 return this; 838 } 839 840 public Builder query(String query) { 841 this.encodedQueryNamesAndValues = query != null 842 ? queryStringToNamesAndValues(canonicalize( 843 query, QUERY_ENCODE_SET, false, false, true, true)) 844 : null; 845 return this; 846 } 847 848 public Builder encodedQuery(String encodedQuery) { 849 this.encodedQueryNamesAndValues = encodedQuery != null 850 ? queryStringToNamesAndValues( 851 canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true)) 852 : null; 853 return this; 854 } 855 856 /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */ 857 public Builder addQueryParameter(String name, String value) { 858 if (name == null) throw new IllegalArgumentException("name == null"); 859 if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); 860 encodedQueryNamesAndValues.add( 861 canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)); 862 encodedQueryNamesAndValues.add(value != null 863 ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true) 864 : null); 865 return this; 866 } 867 868 /** Adds the pre-encoded query parameter to this URL's query string. */ 869 public Builder addEncodedQueryParameter(String encodedName, String encodedValue) { 870 if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); 871 if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>(); 872 encodedQueryNamesAndValues.add( 873 canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); 874 encodedQueryNamesAndValues.add(encodedValue != null 875 ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true) 876 : null); 877 return this; 878 } 879 880 public Builder setQueryParameter(String name, String value) { 881 removeAllQueryParameters(name); 882 addQueryParameter(name, value); 883 return this; 884 } 885 886 public Builder setEncodedQueryParameter(String encodedName, String encodedValue) { 887 removeAllEncodedQueryParameters(encodedName); 888 addEncodedQueryParameter(encodedName, encodedValue); 889 return this; 890 } 891 892 public Builder removeAllQueryParameters(String name) { 893 if (name == null) throw new IllegalArgumentException("name == null"); 894 if (encodedQueryNamesAndValues == null) return this; 895 String nameToRemove = canonicalize( 896 name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true); 897 removeAllCanonicalQueryParameters(nameToRemove); 898 return this; 899 } 900 901 public Builder removeAllEncodedQueryParameters(String encodedName) { 902 if (encodedName == null) throw new IllegalArgumentException("encodedName == null"); 903 if (encodedQueryNamesAndValues == null) return this; 904 removeAllCanonicalQueryParameters( 905 canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)); 906 return this; 907 } 908 909 private void removeAllCanonicalQueryParameters(String canonicalName) { 910 for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) { 911 if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) { 912 encodedQueryNamesAndValues.remove(i + 1); 913 encodedQueryNamesAndValues.remove(i); 914 if (encodedQueryNamesAndValues.isEmpty()) { 915 encodedQueryNamesAndValues = null; 916 return; 917 } 918 } 919 } 920 } 921 922 public Builder fragment(String fragment) { 923 this.encodedFragment = fragment != null 924 ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false) 925 : null; 926 return this; 927 } 928 929 public Builder encodedFragment(String encodedFragment) { 930 this.encodedFragment = encodedFragment != null 931 ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false) 932 : null; 933 return this; 934 } 935 936 /** 937 * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is 938 * particularly strict for certain components. 939 */ 940 Builder reencodeForUri() { 941 for (int i = 0, size = encodedPathSegments.size(); i < size; i++) { 942 String pathSegment = encodedPathSegments.get(i); 943 encodedPathSegments.set(i, 944 canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true)); 945 } 946 if (encodedQueryNamesAndValues != null) { 947 for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) { 948 String component = encodedQueryNamesAndValues.get(i); 949 if (component != null) { 950 encodedQueryNamesAndValues.set(i, 951 canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true)); 952 } 953 } 954 } 955 if (encodedFragment != null) { 956 encodedFragment = canonicalize( 957 encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false); 958 } 959 return this; 960 } 961 962 public HttpUrl build() { 963 if (scheme == null) throw new IllegalStateException("scheme == null"); 964 if (host == null) throw new IllegalStateException("host == null"); 965 return new HttpUrl(this); 966 } 967 968 @Override public String toString() { 969 StringBuilder result = new StringBuilder(); 970 result.append(scheme); 971 result.append("://"); 972 973 if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) { 974 result.append(encodedUsername); 975 if (!encodedPassword.isEmpty()) { 976 result.append(':'); 977 result.append(encodedPassword); 978 } 979 result.append('@'); 980 } 981 982 if (host.indexOf(':') != -1) { 983 // Host is an IPv6 address. 984 result.append('['); 985 result.append(host); 986 result.append(']'); 987 } else { 988 result.append(host); 989 } 990 991 int effectivePort = effectivePort(); 992 if (effectivePort != defaultPort(scheme)) { 993 result.append(':'); 994 result.append(effectivePort); 995 } 996 997 pathSegmentsToString(result, encodedPathSegments); 998 999 if (encodedQueryNamesAndValues != null) { 1000 result.append('?'); 1001 namesAndValuesToQueryString(result, encodedQueryNamesAndValues); 1002 } 1003 1004 if (encodedFragment != null) { 1005 result.append('#'); 1006 result.append(encodedFragment); 1007 } 1008 1009 return result.toString(); 1010 } 1011 1012 enum ParseResult { 1013 SUCCESS, 1014 MISSING_SCHEME, 1015 UNSUPPORTED_SCHEME, 1016 INVALID_PORT, 1017 INVALID_HOST, 1018 } 1019 1020 ParseResult parse(HttpUrl base, String input) { 1021 int pos = skipLeadingAsciiWhitespace(input, 0, input.length()); 1022 int limit = skipTrailingAsciiWhitespace(input, pos, input.length()); 1023 1024 // Scheme. 1025 int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit); 1026 if (schemeDelimiterOffset != -1) { 1027 if (input.regionMatches(true, pos, "https:", 0, 6)) { 1028 this.scheme = "https"; 1029 pos += "https:".length(); 1030 } else if (input.regionMatches(true, pos, "http:", 0, 5)) { 1031 this.scheme = "http"; 1032 pos += "http:".length(); 1033 } else { 1034 return ParseResult.UNSUPPORTED_SCHEME; // Not an HTTP scheme. 1035 } 1036 } else if (base != null) { 1037 this.scheme = base.scheme; 1038 } else { 1039 return ParseResult.MISSING_SCHEME; // No scheme. 1040 } 1041 1042 // Authority. 1043 boolean hasUsername = false; 1044 boolean hasPassword = false; 1045 int slashCount = slashCount(input, pos, limit); 1046 if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) { 1047 // Read an authority if either: 1048 // * The input starts with 2 or more slashes. These follow the scheme if it exists. 1049 // * The input scheme exists and is different from the base URL's scheme. 1050 // 1051 // The structure of an authority is: 1052 // username:password@host:port 1053 // 1054 // Username, password and port are optional. 1055 // [username[:password]@]host[:port] 1056 pos += slashCount; 1057 authority: 1058 while (true) { 1059 int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#"); 1060 int c = componentDelimiterOffset != limit 1061 ? input.charAt(componentDelimiterOffset) 1062 : -1; 1063 switch (c) { 1064 case '@': 1065 // User info precedes. 1066 if (!hasPassword) { 1067 int passwordColonOffset = delimiterOffset( 1068 input, pos, componentDelimiterOffset, ":"); 1069 String canonicalUsername = canonicalize( 1070 input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true); 1071 this.encodedUsername = hasUsername 1072 ? this.encodedUsername + "%40" + canonicalUsername 1073 : canonicalUsername; 1074 if (passwordColonOffset != componentDelimiterOffset) { 1075 hasPassword = true; 1076 this.encodedPassword = canonicalize(input, passwordColonOffset + 1, 1077 componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); 1078 } 1079 hasUsername = true; 1080 } else { 1081 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos, 1082 componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true); 1083 } 1084 pos = componentDelimiterOffset + 1; 1085 break; 1086 1087 case -1: 1088 case '/': 1089 case '\\': 1090 case '?': 1091 case '#': 1092 // Host info precedes. 1093 int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset); 1094 if (portColonOffset + 1 < componentDelimiterOffset) { 1095 this.host = canonicalizeHost(input, pos, portColonOffset); 1096 this.port = parsePort(input, portColonOffset + 1, componentDelimiterOffset); 1097 if (this.port == -1) return ParseResult.INVALID_PORT; // Invalid port. 1098 } else { 1099 this.host = canonicalizeHost(input, pos, portColonOffset); 1100 this.port = defaultPort(this.scheme); 1101 } 1102 if (this.host == null) return ParseResult.INVALID_HOST; // Invalid host. 1103 pos = componentDelimiterOffset; 1104 break authority; 1105 } 1106 } 1107 } else { 1108 // This is a relative link. Copy over all authority components. Also maybe the path & query. 1109 this.encodedUsername = base.encodedUsername(); 1110 this.encodedPassword = base.encodedPassword(); 1111 this.host = base.host; 1112 this.port = base.port; 1113 this.encodedPathSegments.clear(); 1114 this.encodedPathSegments.addAll(base.encodedPathSegments()); 1115 if (pos == limit || input.charAt(pos) == '#') { 1116 encodedQuery(base.encodedQuery()); 1117 } 1118 } 1119 1120 // Resolve the relative path. 1121 int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#"); 1122 resolvePath(input, pos, pathDelimiterOffset); 1123 pos = pathDelimiterOffset; 1124 1125 // Query. 1126 if (pos < limit && input.charAt(pos) == '?') { 1127 int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#"); 1128 this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize( 1129 input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true)); 1130 pos = queryDelimiterOffset; 1131 } 1132 1133 // Fragment. 1134 if (pos < limit && input.charAt(pos) == '#') { 1135 this.encodedFragment = canonicalize( 1136 input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false); 1137 } 1138 1139 return ParseResult.SUCCESS; 1140 } 1141 1142 private void resolvePath(String input, int pos, int limit) { 1143 // Read a delimiter. 1144 if (pos == limit) { 1145 // Empty path: keep the base path as-is. 1146 return; 1147 } 1148 char c = input.charAt(pos); 1149 if (c == '/' || c == '\\') { 1150 // Absolute path: reset to the default "/". 1151 encodedPathSegments.clear(); 1152 encodedPathSegments.add(""); 1153 pos++; 1154 } else { 1155 // ANDROID-BEGIN: http://b/29983827 1156 // // Relative path: clear everything after the last '/'. 1157 // encodedPathSegments.set(encodedPathSegments.size() - 1, ""); 1158 // Relative path: clear everything after the last '/' (if there is one). 1159 if (!encodedPathSegments.isEmpty()) { 1160 encodedPathSegments.set(encodedPathSegments.size() - 1, ""); 1161 } 1162 // ANDROID-END: http://b/29983827 1163 } 1164 1165 // Read path segments. 1166 for (int i = pos; i < limit; ) { 1167 int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\"); 1168 boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit; 1169 push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true); 1170 i = pathSegmentDelimiterOffset; 1171 if (segmentHasTrailingSlash) i++; 1172 } 1173 } 1174 1175 /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */ 1176 private void push(String input, int pos, int limit, boolean addTrailingSlash, 1177 boolean alreadyEncoded) { 1178 String segment = canonicalize( 1179 input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true); 1180 if (isDot(segment)) { 1181 return; // Skip '.' path segments. 1182 } 1183 if (isDotDot(segment)) { 1184 pop(); 1185 return; 1186 } 1187 1188 // ANDROID-BEGIN: http://b/29983827 1189 // If the encodedPathSegments doesn't even include "/" then add the leading "/" before 1190 // pushing more segments or modifying existing segments. 1191 if (encodedPathSegments.isEmpty()) { 1192 encodedPathSegments.add(""); 1193 } 1194 // ANDROID-END: http://b/29983827 1195 1196 if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) { 1197 encodedPathSegments.set(encodedPathSegments.size() - 1, segment); 1198 } else { 1199 encodedPathSegments.add(segment); 1200 } 1201 if (addTrailingSlash) { 1202 encodedPathSegments.add(""); 1203 } 1204 } 1205 1206 private boolean isDot(String input) { 1207 return input.equals(".") || input.equalsIgnoreCase("%2e"); 1208 } 1209 1210 private boolean isDotDot(String input) { 1211 return input.equals("..") 1212 || input.equalsIgnoreCase("%2e.") 1213 || input.equalsIgnoreCase(".%2e") 1214 || input.equalsIgnoreCase("%2e%2e"); 1215 } 1216 1217 /** 1218 * Removes a path segment. When this method returns the last segment is always "", which means 1219 * the encoded path will have a trailing '/'. 1220 * 1221 * <p>Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from 1222 * ["a", "b", "c", ""] to ["a", "b", ""]. 1223 * 1224 * <p>Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"] 1225 * to ["a", "b", ""]. 1226 */ 1227 private void pop() { 1228 // ANDROID-BEGIN: http://b/29983827 1229 // Cannot pop() if there isn't even a "/". Leave the path as is. This method is only used 1230 // from push(). push() handles the empty case explicitly. 1231 if (encodedPathSegments.isEmpty()) { 1232 return; 1233 } 1234 // ANDROID-END: http://b/29983827 1235 1236 String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1); 1237 1238 // Make sure the path ends with a '/' by either adding an empty string or clearing a segment. 1239 if (removed.isEmpty() && !encodedPathSegments.isEmpty()) { 1240 encodedPathSegments.set(encodedPathSegments.size() - 1, ""); 1241 } else { 1242 encodedPathSegments.add(""); 1243 } 1244 } 1245 1246 /** 1247 * Increments {@code pos} until {@code input[pos]} is not ASCII whitespace. Stops at {@code 1248 * limit}. 1249 */ 1250 private int skipLeadingAsciiWhitespace(String input, int pos, int limit) { 1251 for (int i = pos; i < limit; i++) { 1252 switch (input.charAt(i)) { 1253 case '\t': 1254 case '\n': 1255 case '\f': 1256 case '\r': 1257 case ' ': 1258 continue; 1259 default: 1260 return i; 1261 } 1262 } 1263 return limit; 1264 } 1265 1266 /** 1267 * Decrements {@code limit} until {@code input[limit - 1]} is not ASCII whitespace. Stops at 1268 * {@code pos}. 1269 */ 1270 private int skipTrailingAsciiWhitespace(String input, int pos, int limit) { 1271 for (int i = limit - 1; i >= pos; i--) { 1272 switch (input.charAt(i)) { 1273 case '\t': 1274 case '\n': 1275 case '\f': 1276 case '\r': 1277 case ' ': 1278 continue; 1279 default: 1280 return i + 1; 1281 } 1282 } 1283 return pos; 1284 } 1285 1286 /** 1287 * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if 1288 * {@code input} does not have a scheme that starts at {@code pos}. 1289 */ 1290 private static int schemeDelimiterOffset(String input, int pos, int limit) { 1291 if (limit - pos < 2) return -1; 1292 1293 char c0 = input.charAt(pos); 1294 if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char. 1295 1296 for (int i = pos + 1; i < limit; i++) { 1297 char c = input.charAt(i); 1298 1299 if ((c >= 'a' && c <= 'z') 1300 || (c >= 'A' && c <= 'Z') 1301 || (c >= '0' && c <= '9') 1302 || c == '+' 1303 || c == '-' 1304 || c == '.') { 1305 continue; // Scheme character. Keep going. 1306 } else if (c == ':') { 1307 return i; // Scheme prefix! 1308 } else { 1309 return -1; // Non-scheme character before the first ':'. 1310 } 1311 } 1312 1313 return -1; // No ':'; doesn't start with a scheme. 1314 } 1315 1316 /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */ 1317 private static int slashCount(String input, int pos, int limit) { 1318 int slashCount = 0; 1319 while (pos < limit) { 1320 char c = input.charAt(pos); 1321 if (c == '\\' || c == '/') { 1322 slashCount++; 1323 pos++; 1324 } else { 1325 break; 1326 } 1327 } 1328 return slashCount; 1329 } 1330 1331 /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */ 1332 private static int portColonOffset(String input, int pos, int limit) { 1333 for (int i = pos; i < limit; i++) { 1334 switch (input.charAt(i)) { 1335 case '[': 1336 while (++i < limit) { 1337 if (input.charAt(i) == ']') break; 1338 } 1339 break; 1340 case ':': 1341 return i; 1342 } 1343 } 1344 return limit; // No colon. 1345 } 1346 1347 private static String canonicalizeHost(String input, int pos, int limit) { 1348 // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've 1349 // checked for IPv6 square braces. But Chrome does it first, and that's more lenient. 1350 String percentDecoded = percentDecode(input, pos, limit, false); 1351 1352 // If the input is encased in square braces "[...]", drop 'em. We have an IPv6 address. 1353 if (percentDecoded.startsWith("[") && percentDecoded.endsWith("]")) { 1354 InetAddress inetAddress = decodeIpv6(percentDecoded, 1, percentDecoded.length() - 1); 1355 if (inetAddress == null) return null; 1356 byte[] address = inetAddress.getAddress(); 1357 if (address.length == 16) return inet6AddressToAscii(address); 1358 throw new AssertionError(); 1359 } 1360 1361 return domainToAscii(percentDecoded); 1362 } 1363 1364 /** Decodes an IPv6 address like 1111:2222:3333:4444:5555:6666:7777:8888 or ::1. */ 1365 private static InetAddress decodeIpv6(String input, int pos, int limit) { 1366 byte[] address = new byte[16]; 1367 int b = 0; 1368 int compress = -1; 1369 int groupOffset = -1; 1370 1371 for (int i = pos; i < limit; ) { 1372 if (b == address.length) return null; // Too many groups. 1373 1374 // Read a delimiter. 1375 if (i + 2 <= limit && input.regionMatches(i, "::", 0, 2)) { 1376 // Compression "::" delimiter, which is anywhere in the input, including its prefix. 1377 if (compress != -1) return null; // Multiple "::" delimiters. 1378 i += 2; 1379 b += 2; 1380 compress = b; 1381 if (i == limit) break; 1382 } else if (b != 0) { 1383 // Group separator ":" delimiter. 1384 if (input.regionMatches(i, ":", 0, 1)) { 1385 i++; 1386 } else if (input.regionMatches(i, ".", 0, 1)) { 1387 // If we see a '.', rewind to the beginning of the previous group and parse as IPv4. 1388 if (!decodeIpv4Suffix(input, groupOffset, limit, address, b - 2)) return null; 1389 b += 2; // We rewound two bytes and then added four. 1390 break; 1391 } else { 1392 return null; // Wrong delimiter. 1393 } 1394 } 1395 1396 // Read a group, one to four hex digits. 1397 int value = 0; 1398 groupOffset = i; 1399 for (; i < limit; i++) { 1400 char c = input.charAt(i); 1401 int hexDigit = decodeHexDigit(c); 1402 if (hexDigit == -1) break; 1403 value = (value << 4) + hexDigit; 1404 } 1405 int groupLength = i - groupOffset; 1406 if (groupLength == 0 || groupLength > 4) return null; // Group is the wrong size. 1407 1408 // We've successfully read a group. Assign its value to our byte array. 1409 address[b++] = (byte) ((value >>> 8) & 0xff); 1410 address[b++] = (byte) (value & 0xff); 1411 } 1412 1413 // All done. If compression happened, we need to move bytes to the right place in the 1414 // address. Here's a sample: 1415 // 1416 // input: "1111:2222:3333::7777:8888" 1417 // before: { 11, 11, 22, 22, 33, 33, 00, 00, 77, 77, 88, 88, 00, 00, 00, 00 } 1418 // compress: 6 1419 // b: 10 1420 // after: { 11, 11, 22, 22, 33, 33, 00, 00, 00, 00, 00, 00, 77, 77, 88, 88 } 1421 // 1422 if (b != address.length) { 1423 if (compress == -1) return null; // Address didn't have compression or enough groups. 1424 System.arraycopy(address, compress, address, address.length - (b - compress), b - compress); 1425 Arrays.fill(address, compress, compress + (address.length - b), (byte) 0); 1426 } 1427 1428 try { 1429 return InetAddress.getByAddress(address); 1430 } catch (UnknownHostException e) { 1431 throw new AssertionError(); 1432 } 1433 } 1434 1435 /** Decodes an IPv4 address suffix of an IPv6 address, like 1111::5555:6666:192.168.0.1. */ 1436 private static boolean decodeIpv4Suffix( 1437 String input, int pos, int limit, byte[] address, int addressOffset) { 1438 int b = addressOffset; 1439 1440 for (int i = pos; i < limit; ) { 1441 if (b == address.length) return false; // Too many groups. 1442 1443 // Read a delimiter. 1444 if (b != addressOffset) { 1445 if (input.charAt(i) != '.') return false; // Wrong delimiter. 1446 i++; 1447 } 1448 1449 // Read 1 or more decimal digits for a value in 0..255. 1450 int value = 0; 1451 int groupOffset = i; 1452 for (; i < limit; i++) { 1453 char c = input.charAt(i); 1454 if (c < '0' || c > '9') break; 1455 if (value == 0 && groupOffset != i) return false; // Reject unnecessary leading '0's. 1456 value = (value * 10) + c - '0'; 1457 if (value > 255) return false; // Value out of range. 1458 } 1459 int groupLength = i - groupOffset; 1460 if (groupLength == 0) return false; // No digits. 1461 1462 // We've successfully read a byte. 1463 address[b++] = (byte) value; 1464 } 1465 1466 if (b != addressOffset + 4) return false; // Too few groups. We wanted exactly four. 1467 return true; // Success. 1468 } 1469 1470 /** 1471 * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts 1472 * {@code .net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}. 1473 * {@code null} will be returned if the input cannot be ToASCII encoded or if the result 1474 * contains unsupported ASCII characters. 1475 */ 1476 private static String domainToAscii(String input) { 1477 try { 1478 String result = IDN.toASCII(input).toLowerCase(Locale.US); 1479 if (result.isEmpty()) return null; 1480 1481 // Confirm that the IDN ToASCII result doesn't contain any illegal characters. 1482 if (containsInvalidHostnameAsciiCodes(result)) { 1483 return null; 1484 } 1485 // TODO: implement all label limits. 1486 return result; 1487 } catch (IllegalArgumentException e) { 1488 return null; 1489 } 1490 } 1491 1492 private static boolean containsInvalidHostnameAsciiCodes(String hostnameAscii) { 1493 for (int i = 0; i < hostnameAscii.length(); i++) { 1494 char c = hostnameAscii.charAt(i); 1495 // The WHATWG Host parsing rules accepts some character codes which are invalid by 1496 // definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here 1497 // we rule out characters that would cause problems in host headers. 1498 if (c <= '\u001f' || c >= '\u007f') { 1499 return true; 1500 } 1501 // Check for the characters mentioned in the WHATWG Host parsing spec: 1502 // U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]" 1503 // (excluding the characters covered above). 1504 if (" #%/:?@[\\]".indexOf(c) != -1) { 1505 return true; 1506 } 1507 } 1508 return false; 1509 } 1510 1511 private static String inet6AddressToAscii(byte[] address) { 1512 // Go through the address looking for the longest run of 0s. Each group is 2-bytes. 1513 int longestRunOffset = -1; 1514 int longestRunLength = 0; 1515 for (int i = 0; i < address.length; i += 2) { 1516 int currentRunOffset = i; 1517 while (i < 16 && address[i] == 0 && address[i + 1] == 0) { 1518 i += 2; 1519 } 1520 int currentRunLength = i - currentRunOffset; 1521 if (currentRunLength > longestRunLength) { 1522 longestRunOffset = currentRunOffset; 1523 longestRunLength = currentRunLength; 1524 } 1525 } 1526 1527 // Emit each 2-byte group in hex, separated by ':'. The longest run of zeroes is "::". 1528 Buffer result = new Buffer(); 1529 for (int i = 0; i < address.length; ) { 1530 if (i == longestRunOffset) { 1531 result.writeByte(':'); 1532 i += longestRunLength; 1533 if (i == 16) result.writeByte(':'); 1534 } else { 1535 if (i > 0) result.writeByte(':'); 1536 int group = (address[i] & 0xff) << 8 | address[i + 1] & 0xff; 1537 result.writeHexadecimalUnsignedLong(group); 1538 i += 2; 1539 } 1540 } 1541 return result.readUtf8(); 1542 } 1543 1544 private static int parsePort(String input, int pos, int limit) { 1545 try { 1546 // Canonicalize the port string to skip '\n' etc. 1547 String portString = canonicalize(input, pos, limit, "", false, false, false, true); 1548 int i = Integer.parseInt(portString); 1549 if (i > 0 && i <= 65535) return i; 1550 return -1; 1551 } catch (NumberFormatException e) { 1552 return -1; // Invalid port. 1553 } 1554 } 1555 } 1556 1557 /** 1558 * Returns the index of the first character in {@code input} that contains a character in {@code 1559 * delimiters}. Returns limit if there is no such character. 1560 */ 1561 private static int delimiterOffset(String input, int pos, int limit, String delimiters) { 1562 for (int i = pos; i < limit; i++) { 1563 if (delimiters.indexOf(input.charAt(i)) != -1) return i; 1564 } 1565 return limit; 1566 } 1567 1568 static String percentDecode(String encoded, boolean plusIsSpace) { 1569 return percentDecode(encoded, 0, encoded.length(), plusIsSpace); 1570 } 1571 1572 private List<String> percentDecode(List<String> list, boolean plusIsSpace) { 1573 List<String> result = new ArrayList<>(list.size()); 1574 for (String s : list) { 1575 result.add(s != null ? percentDecode(s, plusIsSpace) : null); 1576 } 1577 return Collections.unmodifiableList(result); 1578 } 1579 1580 static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) { 1581 for (int i = pos; i < limit; i++) { 1582 char c = encoded.charAt(i); 1583 if (c == '%' || (c == '+' && plusIsSpace)) { 1584 // Slow path: the character at i requires decoding! 1585 Buffer out = new Buffer(); 1586 out.writeUtf8(encoded, pos, i); 1587 percentDecode(out, encoded, i, limit, plusIsSpace); 1588 return out.readUtf8(); 1589 } 1590 } 1591 1592 // Fast path: no characters in [pos..limit) required decoding. 1593 return encoded.substring(pos, limit); 1594 } 1595 1596 static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) { 1597 int codePoint; 1598 for (int i = pos; i < limit; i += Character.charCount(codePoint)) { 1599 codePoint = encoded.codePointAt(i); 1600 if (codePoint == '%' && i + 2 < limit) { 1601 int d1 = decodeHexDigit(encoded.charAt(i + 1)); 1602 int d2 = decodeHexDigit(encoded.charAt(i + 2)); 1603 if (d1 != -1 && d2 != -1) { 1604 out.writeByte((d1 << 4) + d2); 1605 i += 2; 1606 continue; 1607 } 1608 } else if (codePoint == '+' && plusIsSpace) { 1609 out.writeByte(' '); 1610 continue; 1611 } 1612 out.writeUtf8CodePoint(codePoint); 1613 } 1614 } 1615 1616 static boolean percentEncoded(String encoded, int pos, int limit) { 1617 return pos + 2 < limit 1618 && encoded.charAt(pos) == '%' 1619 && decodeHexDigit(encoded.charAt(pos + 1)) != -1 1620 && decodeHexDigit(encoded.charAt(pos + 2)) != -1; 1621 } 1622 1623 static int decodeHexDigit(char c) { 1624 if (c >= '0' && c <= '9') return c - '0'; 1625 if (c >= 'a' && c <= 'f') return c - 'a' + 10; 1626 if (c >= 'A' && c <= 'F') return c - 'A' + 10; 1627 return -1; 1628 } 1629 1630 /** 1631 * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following 1632 * transformations: 1633 * <ul> 1634 * <li>Tabs, newlines, form feeds and carriage returns are skipped. 1635 * <li>In queries, ' ' is encoded to '+' and '+' is encoded to "%2B". 1636 * <li>Characters in {@code encodeSet} are percent-encoded. 1637 * <li>Control characters and non-ASCII characters are percent-encoded. 1638 * <li>All other characters are copied without transformation. 1639 * </ul> 1640 * 1641 * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'. 1642 * @param strict true to encode '%' if it is not the prefix of a valid percent encoding. 1643 * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded 1644 * @param asciiOnly true to encode all non-ASCII codepoints. 1645 */ 1646 static String canonicalize(String input, int pos, int limit, String encodeSet, 1647 boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { 1648 int codePoint; 1649 for (int i = pos; i < limit; i += Character.charCount(codePoint)) { 1650 codePoint = input.codePointAt(i); 1651 if (codePoint < 0x20 1652 || codePoint == 0x7f 1653 || codePoint >= 0x80 && asciiOnly 1654 || encodeSet.indexOf(codePoint) != -1 1655 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit)) 1656 || codePoint == '+' && plusIsSpace) { 1657 // Slow path: the character at i requires encoding! 1658 Buffer out = new Buffer(); 1659 out.writeUtf8(input, pos, i); 1660 canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace, 1661 asciiOnly); 1662 return out.readUtf8(); 1663 } 1664 } 1665 1666 // Fast path: no characters in [pos..limit) required encoding. 1667 return input.substring(pos, limit); 1668 } 1669 1670 static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet, 1671 boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) { 1672 Buffer utf8Buffer = null; // Lazily allocated. 1673 int codePoint; 1674 for (int i = pos; i < limit; i += Character.charCount(codePoint)) { 1675 codePoint = input.codePointAt(i); 1676 if (alreadyEncoded 1677 && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) { 1678 // Skip this character. 1679 } else if (codePoint == '+' && plusIsSpace) { 1680 // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'. 1681 out.writeUtf8(alreadyEncoded ? "+" : "%2B"); 1682 } else if (codePoint < 0x20 1683 || codePoint == 0x7f 1684 || codePoint >= 0x80 && asciiOnly 1685 || encodeSet.indexOf(codePoint) != -1 1686 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) { 1687 // Percent encode this character. 1688 if (utf8Buffer == null) { 1689 utf8Buffer = new Buffer(); 1690 } 1691 utf8Buffer.writeUtf8CodePoint(codePoint); 1692 while (!utf8Buffer.exhausted()) { 1693 int b = utf8Buffer.readByte() & 0xff; 1694 out.writeByte('%'); 1695 out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]); 1696 out.writeByte(HEX_DIGITS[b & 0xf]); 1697 } 1698 } else { 1699 // This character doesn't need encoding. Just copy it over. 1700 out.writeUtf8CodePoint(codePoint); 1701 } 1702 } 1703 } 1704 1705 static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict, 1706 boolean plusIsSpace, boolean asciiOnly) { 1707 return canonicalize( 1708 input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly); 1709 } 1710 } 1711