Home | History | Annotate | Download | only in okhttp
      1 /*
      2  * Copyright (C) 2015 Square, Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package com.squareup.okhttp;
     17 
     18 import java.net.IDN;
     19 import java.net.InetAddress;
     20 import java.net.MalformedURLException;
     21 import java.net.URI;
     22 import java.net.URISyntaxException;
     23 import java.net.URL;
     24 import java.net.UnknownHostException;
     25 import java.util.ArrayList;
     26 import java.util.Arrays;
     27 import java.util.Collections;
     28 import java.util.LinkedHashSet;
     29 import java.util.List;
     30 import java.util.Locale;
     31 import java.util.Set;
     32 import okio.Buffer;
     33 
     34 /**
     35  * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this
     36  * class to compose and decompose Internet addresses. For example, this code will compose and print
     37  * a URL for Google search: <pre>   {@code
     38  *
     39  *   HttpUrl url = new HttpUrl.Builder()
     40  *       .scheme("https")
     41  *       .host("www.google.com")
     42  *       .addPathSegment("search")
     43  *       .addQueryParameter("q", "polar bears")
     44  *       .build();
     45  *   System.out.println(url);
     46  * }</pre>
     47  *
     48  * which prints: <pre>   {@code
     49  *
     50  *     https://www.google.com/search?q=polar%20bears
     51  * }</pre>
     52  *
     53  * As another example, this code prints the human-readable query parameters of a Twitter search:
     54  * <pre>   {@code
     55  *
     56  *   HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images");
     57  *   for (int i = 0, size = url.querySize(); i < size; i++) {
     58  *     System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i));
     59  *   }
     60  * }</pre>
     61  *
     62  * which prints: <pre>   {@code
     63  *
     64  *   q: cute #puppies
     65  *   f: images
     66  * }</pre>
     67  *
     68  * In addition to composing URLs from their component parts and decomposing URLs into their
     69  * component parts, this class implements relative URL resolution: what address you'd reach by
     70  * clicking a relative link on a specified page. For example: <pre>   {@code
     71  *
     72  *   HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos");
     73  *   HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc");
     74  *   System.out.println(link);
     75  * }</pre>
     76  *
     77  * which prints: <pre>   {@code
     78  *
     79  *   https://www.youtube.com/watch?v=cbP2N1BQdYc
     80  * }</pre>
     81  *
     82  * <h3>What's in a URL?</h3>
     83  *
     84  * A URL has several components.
     85  *
     86  * <h4>Scheme</h4>
     87  * Sometimes referred to as <i>protocol</i>, A URL's scheme describes what mechanism should be used
     88  * to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file}, {@code
     89  * ftp}), this class only supports {@code http} and {@code https}. Use {@link URI java.net.URI} for
     90  * URLs with arbitrary schemes.
     91  *
     92  * <h4>Username and Password</h4>
     93  * Username and password are either present, or the empty string {@code ""} if absent. This class
     94  * offers no mechanism to differentiate empty from absent. Neither of these components are popular
     95  * in practice. Typically HTTP applications use other mechanisms for user identification and
     96  * authentication.
     97  *
     98  * <h4>Host</h4>
     99  * The host identifies the webserver that serves the URL's resource. It is either a hostname like
    100  * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6
    101  * address like {@code ::1}.
    102  *
    103  * <p>Usually a webserver is reachable with multiple identifiers: its IP addresses, registered
    104  * domain names, and even {@code localhost} when connecting from the server itself. Each of a
    105  * webserver's names is a distinct URL and they are not interchangeable. For example, even if
    106  * {@code http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by
    107  * the same IP address, the two URLs identify different resources.
    108  *
    109  * <h4>Port</h4>
    110  * The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS. This
    111  * class never returns -1 for the port: if no port is explicitly specified in the URL then the
    112  * scheme's default is used.
    113  *
    114  * <h4>Path</h4>
    115  * The path identifies a specific resource on the host. Paths have a hierarchical structure like
    116  * "/square/okhttp/issues/1486". Each path segment is prefixed with "/". This class offers methods
    117  * to compose and decompose paths by segment. If a path's last segment is the empty string, then the
    118  * path ends with "/". This class always builds non-empty paths: if the path is omitted it defaults
    119  * to "/", which is a path whose only segment is the empty string.
    120  *
    121  * <h4>Query</h4>
    122  * The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query string
    123  * is subdivided into a collection of name-value parameters. This class offers methods to set the
    124  * query as the single string, or as individual name-value parameters. With name-value parameters
    125  * the values are optional and names may be repeated.
    126  *
    127  * <h4>Fragment</h4>
    128  * The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and query
    129  * the fragment is not sent to the webserver: it's private to the client.
    130  *
    131  * <h3>Encoding</h3>
    132  * Each component must be encoded before it is embedded in the complete URL. As we saw above, the
    133  * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query
    134  * parameter value.
    135  *
    136  * <h4>Percent encoding</h4>
    137  * Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes (like
    138  * {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters,
    139  * non-ASCII characters, and characters that already have another meaning in a particular context.
    140  *
    141  * <p>Percent encoding is used in every URL component except for the hostname. But the set of
    142  * characters that need to be encoded is different for each component. For example, the path
    143  * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the
    144  * start of the URL's query. But within the query and fragment components, the {@code ?} character
    145  * doesn't delimit anything and doesn't need to be escaped. <pre>   {@code
    146  *
    147  *   HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder()
    148  *       .addPathSegment("_Who?_")
    149  *       .query("_Who?_")
    150  *       .fragment("_Who?_")
    151  *       .build();
    152  *   System.out.println(url);
    153  * }</pre>
    154  *
    155  * This prints: <pre>   {@code
    156  *
    157  *   http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_
    158  * }</pre>
    159  *
    160  * When parsing URLs that lack percent encoding where it is required, this class will percent encode
    161  * the offending characters.
    162  *
    163  * <h4>IDNA Mapping and Punycode encoding</h4>
    164  * Hostnames have different requirements and use a different encoding scheme. It consists of IDNA
    165  * mapping and Punycode encoding.
    166  *
    167  * <p>In order to avoid confusion and discourage phishing attacks,
    168  * <a href="http://www.unicode.org/reports/tr46/#ToASCII">IDNA Mapping</a> transforms names to avoid
    169  * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM}
    170  * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the
    171  * Unicode trademark sign () could be confused for the letters "TM" in {@code http://homail.com}.
    172  * To mitigate this, the single character () maps to the string (tm). There is similar policy for
    173  * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are
    174  * not mapped and cannot be used in a hostname.
    175  *
    176  * <p><a href="http://ietf.org/rfc/rfc3492.txt">Punycode</a> converts a Unicode string to an ASCII
    177  * string to make international domain names work everywhere. For example, "" encodes as
    178  * "xn--4xa". The encoded string is not human readable, but can be used with classes like {@link
    179  * InetAddress} to establish connections.
    180  *
    181  * <h3>Why another URL model?</h3>
    182  * Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL
    183  * model to address problems that the others don't.
    184  *
    185  * <h4>Different URLs should be different</h4>
    186  * Although they have different content, {@code java.net.URL} considers the following two URLs
    187  * equal, and the {@link Object#equals equals()} method between them returns true:
    188  * <ul>
    189  *   <li>http://square.github.io/
    190  *   <li>http://google.github.io/
    191  * </ul>
    192  * This is because those two hosts share the same IP address. This is an old, bad design decision
    193  * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link
    194  * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may
    195  * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are
    196  * hosted.
    197  *
    198  * <h4>Equal URLs should be equal</h4>
    199  * These two URLs are semantically identical, but {@code java.net.URI} disagrees:
    200  * <ul>
    201  *   <li>http://host:80/
    202  *   <li>http://host
    203  * </ul>
    204  * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /})
    205  * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any
    206  * application that stores information-per-URL will need to either canonicalize manually, or suffer
    207  * unnecessary redundancy for such URLs.
    208  *
    209  * <p>Because they don't attempt canonical form, these classes are surprisingly difficult to use
    210  * securely. Suppose you're building a webservice that checks that incoming paths are prefixed
    211  * "/static/images/" before serving the corresponding assets from the filesystem. <pre>   {@code
    212  *
    213  *   String attack = "http://example.com/static/images/../../../../../etc/passwd";
    214  *   System.out.println(new URL(attack).getPath());
    215  *   System.out.println(new URI(attack).getPath());
    216  *   System.out.println(HttpUrl.parse(attack).path());
    217  * }</pre>
    218  *
    219  * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that
    220  * checks only the path prefix may suffer!
    221  * <pre>   {@code
    222  *
    223  *    /static/images/../../../../../etc/passwd
    224  *    /static/images/../../../../../etc/passwd
    225  *    /etc/passwd
    226  * }</pre>
    227  *
    228  * <h4>If it works on the web, it should work in your application</h4>
    229  * The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like
    230  * "http://example.com/abc|def" because the '|' character is unsupported. This class is more
    231  * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef".
    232  * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with
    233  * major web browsers over consistency with obsolete specifications.
    234  *
    235  * <h4>Paths and Queries should decompose</h4>
    236  * Neither of the built-in URL models offer direct access to path segments or query parameters.
    237  * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+'
    238  * characters get silently replaced with spaces? If a query parameter contains a '&amp;', does that
    239  * get escaped? By offering methods to read and write individual query parameters directly,
    240  * application developers are saved from the hassles of encoding and decoding.
    241  *
    242  * <h4>Plus a modern API</h4>
    243  * The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping
    244  * constructors. For example, there's no API to compose a URI with a custom port without also
    245  * providing a query and fragment.
    246  *
    247  * <p>Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With
    248  * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and
    249  * path but no hostname. Building APIs that consume such malformed values is difficult!
    250  *
    251  * <p>This class has a modern API. It avoids punitive checked exceptions: {@link #parse parse()}
    252  * returns null if the input is an invalid URL. You can even be explicit about whether each
    253  * component has been encoded already.
    254  */
    255 public final class HttpUrl {
    256   private static final char[] HEX_DIGITS =
    257       { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
    258   static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
    259   static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
    260   static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#";
    261   static final String PATH_SEGMENT_ENCODE_SET_URI = "[]";
    262   static final String QUERY_ENCODE_SET = " \"'<>#";
    263   static final String QUERY_COMPONENT_ENCODE_SET = " \"'<>#&=";
    264   static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}";
    265   static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~";
    266   static final String FRAGMENT_ENCODE_SET = "";
    267   static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}";
    268 
    269   /** Either "http" or "https". */
    270   private final String scheme;
    271 
    272   /** Decoded username. */
    273   private final String username;
    274 
    275   /** Decoded password. */
    276   private final String password;
    277 
    278   /** Canonical hostname. */
    279   private final String host;
    280 
    281   /** Either 80, 443 or a user-specified port. In range [1..65535]. */
    282   private final int port;
    283 
    284   /**
    285    * A list of canonical path segments. This list always contains at least one element, which may
    286    * be the empty string. Each segment is formatted with a leading '/', so if path segments were
    287    * ["a", "b", ""], then the encoded path would be "/a/b/".
    288    */
    289   private final List<String> pathSegments;
    290 
    291   /**
    292    * Alternating, decoded query names and values, or null for no query. Names may be empty or
    293    * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or
    294    * empty, or non-empty.
    295    */
    296   private final List<String> queryNamesAndValues;
    297 
    298   /** Decoded fragment. */
    299   private final String fragment;
    300 
    301   /** Canonical URL. */
    302   private final String url;
    303 
    304   private HttpUrl(Builder builder) {
    305     this.scheme = builder.scheme;
    306     this.username = percentDecode(builder.encodedUsername, false);
    307     this.password = percentDecode(builder.encodedPassword, false);
    308     this.host = builder.host;
    309     this.port = builder.effectivePort();
    310     this.pathSegments = percentDecode(builder.encodedPathSegments, false);
    311     this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null
    312         ? percentDecode(builder.encodedQueryNamesAndValues, true)
    313         : null;
    314     this.fragment = builder.encodedFragment != null
    315         ? percentDecode(builder.encodedFragment, false)
    316         : null;
    317     this.url = builder.toString();
    318   }
    319 
    320   /** Returns this URL as a {@link URL java.net.URL}. */
    321   public URL url() {
    322     try {
    323       return new URL(url);
    324     } catch (MalformedURLException e) {
    325       throw new RuntimeException(e); // Unexpected!
    326     }
    327   }
    328 
    329   /**
    330    * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this
    331    * class, the returned URI may be semantically different from this URL:
    332    * <ul>
    333    *   <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped.
    334    *   <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}.
    335    *   <li>Whitespace and control characters in the fragment will be stripped.
    336    * </ul>
    337    *
    338    * <p>These differences may have a significant consequence when the URI is interpretted by a
    339    * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided.
    340    */
    341   public URI uri() {
    342     String uri = newBuilder().reencodeForUri().toString();
    343     try {
    344       return new URI(uri);
    345     } catch (URISyntaxException e) {
    346       // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry.
    347       try {
    348         String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", "");
    349         return URI.create(stripped);
    350       } catch (Exception e1) {
    351         throw new RuntimeException(e); // Unexpected!
    352       }
    353     }
    354   }
    355 
    356   /** Returns either "http" or "https". */
    357   public String scheme() {
    358     return scheme;
    359   }
    360 
    361   public boolean isHttps() {
    362     return scheme.equals("https");
    363   }
    364 
    365   /** Returns the username, or an empty string if none is set. */
    366   public String encodedUsername() {
    367     if (username.isEmpty()) return "";
    368     int usernameStart = scheme.length() + 3; // "://".length() == 3.
    369     int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@");
    370     return url.substring(usernameStart, usernameEnd);
    371   }
    372 
    373   public String username() {
    374     return username;
    375   }
    376 
    377   /** Returns the password, or an empty string if none is set. */
    378   public String encodedPassword() {
    379     if (password.isEmpty()) return "";
    380     int passwordStart = url.indexOf(':', scheme.length() + 3) + 1;
    381     int passwordEnd = url.indexOf('@');
    382     return url.substring(passwordStart, passwordEnd);
    383   }
    384 
    385   /** Returns the decoded password, or an empty string if none is present. */
    386   public String password() {
    387     return password;
    388   }
    389 
    390   /**
    391    * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May
    392    * be:
    393    * <ul>
    394    *   <li>A regular host name, like {@code android.com}.
    395    *   <li>An IPv4 address, like {@code 127.0.0.1}.
    396    *   <li>An IPv6 address, like {@code ::1}. Note that there are no square braces.
    397    *   <li>An encoded IDN, like {@code xn--n3h.net}.
    398    * </ul>
    399    */
    400   public String host() {
    401     return host;
    402   }
    403 
    404   /**
    405    * Same as {@link #host} except that literal IPv6 addresses are surrounding by square
    406    * braces. For example, this method will return {@code [::1]} where {@code host} returns
    407    * {@code ::1}.
    408    */
    409   public String rfc2732host() {
    410     if (host.indexOf(':') == -1) {
    411       return host;
    412     }
    413 
    414     return "[" + host + "]";
    415   }
    416 
    417   /**
    418    * Returns the explicitly-specified port if one was provided, or the default port for this URL's
    419    * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code
    420    * https://square.com/}. The result is in {@code [1..65535]}.
    421    */
    422   public int port() {
    423     return port;
    424   }
    425 
    426   /**
    427    * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1
    428    * otherwise.
    429    */
    430   public static int defaultPort(String scheme) {
    431     if (scheme.equals("http")) {
    432       return 80;
    433     } else if (scheme.equals("https")) {
    434       return 443;
    435     } else {
    436       return -1;
    437     }
    438   }
    439 
    440   public int pathSize() {
    441     return pathSegments.size();
    442   }
    443 
    444   /**
    445    * Returns the entire path of this URL, encoded for use in HTTP resource resolution.
    446    // ANDROID-BEGIN: http://b/29983827
    447    //   * The returned path is always nonempty and is prefixed with {@code /}.
    448    // ANDROID-END: http://b/29983827
    449    */
    450   public String encodedPath() {
    451     int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3.
    452     // ANDROID-BEGIN: http://b/29983827
    453     if (pathStart == -1) {
    454       return "";
    455     }
    456     // ANDROID-END: http://b/29983827
    457     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
    458     return url.substring(pathStart, pathEnd);
    459   }
    460 
    461   static void pathSegmentsToString(StringBuilder out, List<String> pathSegments) {
    462     for (int i = 0, size = pathSegments.size(); i < size; i++) {
    463       out.append('/');
    464       out.append(pathSegments.get(i));
    465     }
    466   }
    467 
    468   public List<String> encodedPathSegments() {
    469     int pathStart = url.indexOf('/', scheme.length() + 3);
    470     // ANDROID-BEGIN: http://b/29983827
    471     if (pathStart == -1) {
    472       return new ArrayList<>();
    473     }
    474     // ANDROID-END: http://b/29983827
    475 
    476     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
    477     List<String> result = new ArrayList<>();
    478     for (int i = pathStart; i < pathEnd; ) {
    479       i++; // Skip the '/'.
    480       int segmentEnd = delimiterOffset(url, i, pathEnd, "/");
    481       result.add(url.substring(i, segmentEnd));
    482       i = segmentEnd;
    483     }
    484     return result;
    485   }
    486 
    487   public List<String> pathSegments() {
    488     return pathSegments;
    489   }
    490 
    491   /**
    492    * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string
    493    * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all
    494    * other URLs).
    495    */
    496   public String encodedQuery() {
    497     if (queryNamesAndValues == null) return null; // No query.
    498     int queryStart = url.indexOf('?') + 1;
    499     int queryEnd = delimiterOffset(url, queryStart + 1, url.length(), "#");
    500     return url.substring(queryStart, queryEnd);
    501   }
    502 
    503   static void namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues) {
    504     for (int i = 0, size = namesAndValues.size(); i < size; i += 2) {
    505       String name = namesAndValues.get(i);
    506       String value = namesAndValues.get(i + 1);
    507       if (i > 0) out.append('&');
    508       out.append(name);
    509       if (value != null) {
    510         out.append('=');
    511         out.append(value);
    512       }
    513     }
    514   }
    515 
    516   /**
    517    * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a
    518    * query string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject",
    519    * "math", "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain
    520    * '=' characters.
    521    */
    522   static List<String> queryStringToNamesAndValues(String encodedQuery) {
    523     List<String> result = new ArrayList<>();
    524     for (int pos = 0; pos <= encodedQuery.length(); ) {
    525       int ampersandOffset = encodedQuery.indexOf('&', pos);
    526       if (ampersandOffset == -1) ampersandOffset = encodedQuery.length();
    527 
    528       int equalsOffset = encodedQuery.indexOf('=', pos);
    529       if (equalsOffset == -1 || equalsOffset > ampersandOffset) {
    530         result.add(encodedQuery.substring(pos, ampersandOffset));
    531         result.add(null); // No value for this name.
    532       } else {
    533         result.add(encodedQuery.substring(pos, equalsOffset));
    534         result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset));
    535       }
    536       pos = ampersandOffset + 1;
    537     }
    538     return result;
    539   }
    540 
    541   public String query() {
    542     if (queryNamesAndValues == null) return null; // No query.
    543     StringBuilder result = new StringBuilder();
    544     namesAndValuesToQueryString(result, queryNamesAndValues);
    545     return result.toString();
    546   }
    547 
    548   public int querySize() {
    549     return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0;
    550   }
    551 
    552   /**
    553    * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is
    554    * no such query parameter.
    555    */
    556   public String queryParameter(String name) {
    557     if (queryNamesAndValues == null) return null;
    558     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
    559       if (name.equals(queryNamesAndValues.get(i))) {
    560         return queryNamesAndValues.get(i + 1);
    561       }
    562     }
    563     return null;
    564   }
    565 
    566   public Set<String> queryParameterNames() {
    567     if (queryNamesAndValues == null) return Collections.emptySet();
    568     Set<String> result = new LinkedHashSet<>();
    569     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
    570       result.add(queryNamesAndValues.get(i));
    571     }
    572     return Collections.unmodifiableSet(result);
    573   }
    574 
    575   public List<String> queryParameterValues(String name) {
    576     if (queryNamesAndValues == null) return Collections.emptyList();
    577     List<String> result = new ArrayList<>();
    578     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
    579       if (name.equals(queryNamesAndValues.get(i))) {
    580         result.add(queryNamesAndValues.get(i + 1));
    581       }
    582     }
    583     return Collections.unmodifiableList(result);
    584   }
    585 
    586   public String queryParameterName(int index) {
    587     return queryNamesAndValues.get(index * 2);
    588   }
    589 
    590   public String queryParameterValue(int index) {
    591     return queryNamesAndValues.get(index * 2 + 1);
    592   }
    593 
    594   public String encodedFragment() {
    595     if (fragment == null) return null;
    596     int fragmentStart = url.indexOf('#') + 1;
    597     return url.substring(fragmentStart);
    598   }
    599 
    600   public String fragment() {
    601     return fragment;
    602   }
    603 
    604   /** Returns the URL that would be retrieved by following {@code link} from this URL. */
    605   public HttpUrl resolve(String link) {
    606     // ANDROID-BEGIN: http://b/29983827
    607     // Builder builder = new Builder();
    608     Builder builder = new Builder(false);
    609     // ANDROID-END: http://b/29983827
    610     Builder.ParseResult result = builder.parse(this, link);
    611     return result == Builder.ParseResult.SUCCESS ? builder.build() : null;
    612   }
    613 
    614   public Builder newBuilder() {
    615     // ANDROID-BEGIN: http://b/29983827
    616     // Builder builder = new Builder();
    617     Builder result = new Builder(false);
    618     // ANDROID-END: http://b/29983827
    619     result.scheme = scheme;
    620     result.encodedUsername = encodedUsername();
    621     result.encodedPassword = encodedPassword();
    622     result.host = host;
    623     // If we're set to a default port, unset it in case of a scheme change.
    624     result.port = port != defaultPort(scheme) ? port : -1;
    625     result.encodedPathSegments.clear();
    626     result.encodedPathSegments.addAll(encodedPathSegments());
    627     result.encodedQuery(encodedQuery());
    628     result.encodedFragment = encodedFragment();
    629     return result;
    630   }
    631 
    632   /**
    633    * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS
    634    * URL, or null if it isn't.
    635    */
    636   public static HttpUrl parse(String url) {
    637     // ANDROID-BEGIN: http://b/29983827
    638     // Builder builder = new Builder();
    639     Builder builder = new Builder(false);
    640     // ANDROID-END: http://b/29983827
    641     Builder.ParseResult result = builder.parse(null, url);
    642     return result == Builder.ParseResult.SUCCESS ? builder.build() : null;
    643   }
    644 
    645   /**
    646    * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or
    647    * null if it has any other protocol.
    648    */
    649   public static HttpUrl get(URL url) {
    650     return parse(url.toString());
    651   }
    652 
    653   /**
    654    * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS
    655    * URL, or throws an exception if it isn't.
    656    *
    657    * @throws MalformedURLException if there was a non-host related URL issue
    658    * @throws UnknownHostException if the host was invalid
    659    */
    660   static HttpUrl getChecked(String url) throws MalformedURLException, UnknownHostException {
    661     // ANDROID-END: http://b/29983827
    662     // Builder builder = new Builder();
    663     Builder builder = new Builder(false);
    664     // ANDROID-END: http://b/29983827
    665     Builder.ParseResult result = builder.parse(null, url);
    666     switch (result) {
    667       case SUCCESS:
    668         return builder.build();
    669       case INVALID_HOST:
    670         throw new UnknownHostException("Invalid host: " + url);
    671       case UNSUPPORTED_SCHEME:
    672       case MISSING_SCHEME:
    673       case INVALID_PORT:
    674       default:
    675         throw new MalformedURLException("Invalid URL: " + result + " for " + url);
    676     }
    677   }
    678 
    679   public static HttpUrl get(URI uri) {
    680     return parse(uri.toString());
    681   }
    682 
    683   @Override public boolean equals(Object o) {
    684     return o instanceof HttpUrl && ((HttpUrl) o).url.equals(url);
    685   }
    686 
    687   @Override public int hashCode() {
    688     return url.hashCode();
    689   }
    690 
    691   @Override public String toString() {
    692     return url;
    693   }
    694 
    695   public static final class Builder {
    696     String scheme;
    697     String encodedUsername = "";
    698     String encodedPassword = "";
    699     String host;
    700     int port = -1;
    701     final List<String> encodedPathSegments = new ArrayList<>();
    702     List<String> encodedQueryNamesAndValues;
    703     String encodedFragment;
    704 
    705     // ANDROID-BEGIN: http://b/29983827
    706     // public Builder() {
    707     //   encodedPathSegments.add(""); // The default path is '/' which needs a trailing space.
    708     // }
    709 
    710     public Builder() {
    711       this(true); // // The default path is '/' which needs a trailing space.
    712     }
    713 
    714     private Builder(boolean startWithSlash) {
    715       if (startWithSlash) {
    716         encodedPathSegments.add("");
    717       }
    718     }
    719     // ANDROID-END: http://b/29983827
    720 
    721     public Builder scheme(String scheme) {
    722       if (scheme == null) {
    723         throw new IllegalArgumentException("scheme == null");
    724       } else if (scheme.equalsIgnoreCase("http")) {
    725         this.scheme = "http";
    726       } else if (scheme.equalsIgnoreCase("https")) {
    727         this.scheme = "https";
    728       } else {
    729         throw new IllegalArgumentException("unexpected scheme: " + scheme);
    730       }
    731       return this;
    732     }
    733 
    734     public Builder username(String username) {
    735       if (username == null) throw new IllegalArgumentException("username == null");
    736       this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true);
    737       return this;
    738     }
    739 
    740     public Builder encodedUsername(String encodedUsername) {
    741       if (encodedUsername == null) throw new IllegalArgumentException("encodedUsername == null");
    742       this.encodedUsername = canonicalize(
    743           encodedUsername, USERNAME_ENCODE_SET, true, false, false, true);
    744       return this;
    745     }
    746 
    747     public Builder password(String password) {
    748       if (password == null) throw new IllegalArgumentException("password == null");
    749       this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true);
    750       return this;
    751     }
    752 
    753     public Builder encodedPassword(String encodedPassword) {
    754       if (encodedPassword == null) throw new IllegalArgumentException("encodedPassword == null");
    755       this.encodedPassword = canonicalize(
    756           encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true);
    757       return this;
    758     }
    759 
    760     /**
    761      * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6
    762      *     address.
    763      */
    764     public Builder host(String host) {
    765       if (host == null) throw new IllegalArgumentException("host == null");
    766       String encoded = canonicalizeHost(host, 0, host.length());
    767       if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host);
    768       this.host = encoded;
    769       return this;
    770     }
    771 
    772     public Builder port(int port) {
    773       if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port);
    774       this.port = port;
    775       return this;
    776     }
    777 
    778     int effectivePort() {
    779       return port != -1 ? port : defaultPort(scheme);
    780     }
    781 
    782     public Builder addPathSegment(String pathSegment) {
    783       if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
    784       push(pathSegment, 0, pathSegment.length(), false, false);
    785       return this;
    786     }
    787 
    788     public Builder addEncodedPathSegment(String encodedPathSegment) {
    789       if (encodedPathSegment == null) {
    790         throw new IllegalArgumentException("encodedPathSegment == null");
    791       }
    792       push(encodedPathSegment, 0, encodedPathSegment.length(), false, true);
    793       return this;
    794     }
    795 
    796     public Builder setPathSegment(int index, String pathSegment) {
    797       if (pathSegment == null) throw new IllegalArgumentException("pathSegment == null");
    798       String canonicalPathSegment = canonicalize(
    799           pathSegment, 0, pathSegment.length(), PATH_SEGMENT_ENCODE_SET, false, false, false, true);
    800       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
    801         throw new IllegalArgumentException("unexpected path segment: " + pathSegment);
    802       }
    803       encodedPathSegments.set(index, canonicalPathSegment);
    804       return this;
    805     }
    806 
    807     public Builder setEncodedPathSegment(int index, String encodedPathSegment) {
    808       if (encodedPathSegment == null) {
    809         throw new IllegalArgumentException("encodedPathSegment == null");
    810       }
    811       String canonicalPathSegment = canonicalize(encodedPathSegment,
    812           0, encodedPathSegment.length(), PATH_SEGMENT_ENCODE_SET, true, false, false, true);
    813       encodedPathSegments.set(index, canonicalPathSegment);
    814       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
    815         throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment);
    816       }
    817       return this;
    818     }
    819 
    820     public Builder removePathSegment(int index) {
    821       encodedPathSegments.remove(index);
    822       // ANDROID-BEGIN: http://b/29983827. Note this method only used from tests.
    823       // Only changed for consistency.
    824       //      if (encodedPathSegments.isEmpty()) {
    825       //        encodedPathSegments.add(""); // Always leave at least one '/'.
    826       //      }
    827       // ANDROID-END: http://b/29983827 - only used from tests
    828       return this;
    829     }
    830 
    831     public Builder encodedPath(String encodedPath) {
    832       if (encodedPath == null) throw new IllegalArgumentException("encodedPath == null");
    833       if (!encodedPath.startsWith("/")) {
    834         throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath);
    835       }
    836       resolvePath(encodedPath, 0, encodedPath.length());
    837       return this;
    838     }
    839 
    840     public Builder query(String query) {
    841       this.encodedQueryNamesAndValues = query != null
    842           ? queryStringToNamesAndValues(canonicalize(
    843               query, QUERY_ENCODE_SET, false, false, true, true))
    844           : null;
    845       return this;
    846     }
    847 
    848     public Builder encodedQuery(String encodedQuery) {
    849       this.encodedQueryNamesAndValues = encodedQuery != null
    850           ? queryStringToNamesAndValues(
    851               canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true))
    852           : null;
    853       return this;
    854     }
    855 
    856     /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */
    857     public Builder addQueryParameter(String name, String value) {
    858       if (name == null) throw new IllegalArgumentException("name == null");
    859       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
    860       encodedQueryNamesAndValues.add(
    861           canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true));
    862       encodedQueryNamesAndValues.add(value != null
    863           ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)
    864           : null);
    865       return this;
    866     }
    867 
    868     /** Adds the pre-encoded query parameter to this URL's query string. */
    869     public Builder addEncodedQueryParameter(String encodedName, String encodedValue) {
    870       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
    871       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
    872       encodedQueryNamesAndValues.add(
    873           canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
    874       encodedQueryNamesAndValues.add(encodedValue != null
    875           ? canonicalize(encodedValue, QUERY_COMPONENT_ENCODE_SET, true, false, true, true)
    876           : null);
    877       return this;
    878     }
    879 
    880     public Builder setQueryParameter(String name, String value) {
    881       removeAllQueryParameters(name);
    882       addQueryParameter(name, value);
    883       return this;
    884     }
    885 
    886     public Builder setEncodedQueryParameter(String encodedName, String encodedValue) {
    887       removeAllEncodedQueryParameters(encodedName);
    888       addEncodedQueryParameter(encodedName, encodedValue);
    889       return this;
    890     }
    891 
    892     public Builder removeAllQueryParameters(String name) {
    893       if (name == null) throw new IllegalArgumentException("name == null");
    894       if (encodedQueryNamesAndValues == null) return this;
    895       String nameToRemove = canonicalize(
    896           name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true);
    897       removeAllCanonicalQueryParameters(nameToRemove);
    898       return this;
    899     }
    900 
    901     public Builder removeAllEncodedQueryParameters(String encodedName) {
    902       if (encodedName == null) throw new IllegalArgumentException("encodedName == null");
    903       if (encodedQueryNamesAndValues == null) return this;
    904       removeAllCanonicalQueryParameters(
    905           canonicalize(encodedName, QUERY_COMPONENT_ENCODE_SET, true, false, true, true));
    906       return this;
    907     }
    908 
    909     private void removeAllCanonicalQueryParameters(String canonicalName) {
    910       for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) {
    911         if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) {
    912           encodedQueryNamesAndValues.remove(i + 1);
    913           encodedQueryNamesAndValues.remove(i);
    914           if (encodedQueryNamesAndValues.isEmpty()) {
    915             encodedQueryNamesAndValues = null;
    916             return;
    917           }
    918         }
    919       }
    920     }
    921 
    922     public Builder fragment(String fragment) {
    923       this.encodedFragment = fragment != null
    924           ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false)
    925           : null;
    926       return this;
    927     }
    928 
    929     public Builder encodedFragment(String encodedFragment) {
    930       this.encodedFragment = encodedFragment != null
    931           ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false)
    932           : null;
    933       return this;
    934     }
    935 
    936     /**
    937      * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is
    938      * particularly strict for certain components.
    939      */
    940     Builder reencodeForUri() {
    941       for (int i = 0, size = encodedPathSegments.size(); i < size; i++) {
    942         String pathSegment = encodedPathSegments.get(i);
    943         encodedPathSegments.set(i,
    944             canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true));
    945       }
    946       if (encodedQueryNamesAndValues != null) {
    947         for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) {
    948           String component = encodedQueryNamesAndValues.get(i);
    949           if (component != null) {
    950             encodedQueryNamesAndValues.set(i,
    951                 canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true));
    952           }
    953         }
    954       }
    955       if (encodedFragment != null) {
    956         encodedFragment = canonicalize(
    957             encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false);
    958       }
    959       return this;
    960     }
    961 
    962     public HttpUrl build() {
    963       if (scheme == null) throw new IllegalStateException("scheme == null");
    964       if (host == null) throw new IllegalStateException("host == null");
    965       return new HttpUrl(this);
    966     }
    967 
    968     @Override public String toString() {
    969       StringBuilder result = new StringBuilder();
    970       result.append(scheme);
    971       result.append("://");
    972 
    973       if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) {
    974         result.append(encodedUsername);
    975         if (!encodedPassword.isEmpty()) {
    976           result.append(':');
    977           result.append(encodedPassword);
    978         }
    979         result.append('@');
    980       }
    981 
    982       if (host.indexOf(':') != -1) {
    983         // Host is an IPv6 address.
    984         result.append('[');
    985         result.append(host);
    986         result.append(']');
    987       } else {
    988         result.append(host);
    989       }
    990 
    991       int effectivePort = effectivePort();
    992       if (effectivePort != defaultPort(scheme)) {
    993         result.append(':');
    994         result.append(effectivePort);
    995       }
    996 
    997       pathSegmentsToString(result, encodedPathSegments);
    998 
    999       if (encodedQueryNamesAndValues != null) {
   1000         result.append('?');
   1001         namesAndValuesToQueryString(result, encodedQueryNamesAndValues);
   1002       }
   1003 
   1004       if (encodedFragment != null) {
   1005         result.append('#');
   1006         result.append(encodedFragment);
   1007       }
   1008 
   1009       return result.toString();
   1010     }
   1011 
   1012     enum ParseResult {
   1013       SUCCESS,
   1014       MISSING_SCHEME,
   1015       UNSUPPORTED_SCHEME,
   1016       INVALID_PORT,
   1017       INVALID_HOST,
   1018     }
   1019 
   1020     ParseResult parse(HttpUrl base, String input) {
   1021       int pos = skipLeadingAsciiWhitespace(input, 0, input.length());
   1022       int limit = skipTrailingAsciiWhitespace(input, pos, input.length());
   1023 
   1024       // Scheme.
   1025       int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit);
   1026       if (schemeDelimiterOffset != -1) {
   1027         if (input.regionMatches(true, pos, "https:", 0, 6)) {
   1028           this.scheme = "https";
   1029           pos += "https:".length();
   1030         } else if (input.regionMatches(true, pos, "http:", 0, 5)) {
   1031           this.scheme = "http";
   1032           pos += "http:".length();
   1033         } else {
   1034           return ParseResult.UNSUPPORTED_SCHEME; // Not an HTTP scheme.
   1035         }
   1036       } else if (base != null) {
   1037         this.scheme = base.scheme;
   1038       } else {
   1039         return ParseResult.MISSING_SCHEME; // No scheme.
   1040       }
   1041 
   1042       // Authority.
   1043       boolean hasUsername = false;
   1044       boolean hasPassword = false;
   1045       int slashCount = slashCount(input, pos, limit);
   1046       if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) {
   1047         // Read an authority if either:
   1048         //  * The input starts with 2 or more slashes. These follow the scheme if it exists.
   1049         //  * The input scheme exists and is different from the base URL's scheme.
   1050         //
   1051         // The structure of an authority is:
   1052         //   username:password@host:port
   1053         //
   1054         // Username, password and port are optional.
   1055         //   [username[:password]@]host[:port]
   1056         pos += slashCount;
   1057         authority:
   1058         while (true) {
   1059           int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#");
   1060           int c = componentDelimiterOffset != limit
   1061               ? input.charAt(componentDelimiterOffset)
   1062               : -1;
   1063           switch (c) {
   1064             case '@':
   1065               // User info precedes.
   1066               if (!hasPassword) {
   1067                 int passwordColonOffset = delimiterOffset(
   1068                     input, pos, componentDelimiterOffset, ":");
   1069                 String canonicalUsername = canonicalize(
   1070                     input, pos, passwordColonOffset, USERNAME_ENCODE_SET, true, false, false, true);
   1071                 this.encodedUsername = hasUsername
   1072                     ? this.encodedUsername + "%40" + canonicalUsername
   1073                     : canonicalUsername;
   1074                 if (passwordColonOffset != componentDelimiterOffset) {
   1075                   hasPassword = true;
   1076                   this.encodedPassword = canonicalize(input, passwordColonOffset + 1,
   1077                       componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
   1078                 }
   1079                 hasUsername = true;
   1080               } else {
   1081                 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos,
   1082                     componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true);
   1083               }
   1084               pos = componentDelimiterOffset + 1;
   1085               break;
   1086 
   1087             case -1:
   1088             case '/':
   1089             case '\\':
   1090             case '?':
   1091             case '#':
   1092               // Host info precedes.
   1093               int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset);
   1094               if (portColonOffset + 1 < componentDelimiterOffset) {
   1095                 this.host = canonicalizeHost(input, pos, portColonOffset);
   1096                 this.port = parsePort(input, portColonOffset + 1, componentDelimiterOffset);
   1097                 if (this.port == -1) return ParseResult.INVALID_PORT; // Invalid port.
   1098               } else {
   1099                 this.host = canonicalizeHost(input, pos, portColonOffset);
   1100                 this.port = defaultPort(this.scheme);
   1101               }
   1102               if (this.host == null) return ParseResult.INVALID_HOST; // Invalid host.
   1103               pos = componentDelimiterOffset;
   1104               break authority;
   1105           }
   1106         }
   1107       } else {
   1108         // This is a relative link. Copy over all authority components. Also maybe the path & query.
   1109         this.encodedUsername = base.encodedUsername();
   1110         this.encodedPassword = base.encodedPassword();
   1111         this.host = base.host;
   1112         this.port = base.port;
   1113         this.encodedPathSegments.clear();
   1114         this.encodedPathSegments.addAll(base.encodedPathSegments());
   1115         if (pos == limit || input.charAt(pos) == '#') {
   1116           encodedQuery(base.encodedQuery());
   1117         }
   1118       }
   1119 
   1120       // Resolve the relative path.
   1121       int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");
   1122       resolvePath(input, pos, pathDelimiterOffset);
   1123       pos = pathDelimiterOffset;
   1124 
   1125       // Query.
   1126       if (pos < limit && input.charAt(pos) == '?') {
   1127         int queryDelimiterOffset = delimiterOffset(input, pos, limit, "#");
   1128         this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(
   1129             input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true));
   1130         pos = queryDelimiterOffset;
   1131       }
   1132 
   1133       // Fragment.
   1134       if (pos < limit && input.charAt(pos) == '#') {
   1135         this.encodedFragment = canonicalize(
   1136             input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false);
   1137       }
   1138 
   1139       return ParseResult.SUCCESS;
   1140     }
   1141 
   1142     private void resolvePath(String input, int pos, int limit) {
   1143       // Read a delimiter.
   1144       if (pos == limit) {
   1145         // Empty path: keep the base path as-is.
   1146         return;
   1147       }
   1148       char c = input.charAt(pos);
   1149       if (c == '/' || c == '\\') {
   1150         // Absolute path: reset to the default "/".
   1151         encodedPathSegments.clear();
   1152         encodedPathSegments.add("");
   1153         pos++;
   1154       } else {
   1155         // ANDROID-BEGIN: http://b/29983827
   1156         // // Relative path: clear everything after the last '/'.
   1157         // encodedPathSegments.set(encodedPathSegments.size() - 1, "");
   1158         // Relative path: clear everything after the last '/' (if there is one).
   1159         if (!encodedPathSegments.isEmpty()) {
   1160           encodedPathSegments.set(encodedPathSegments.size() - 1, "");
   1161         }
   1162         // ANDROID-END: http://b/29983827
   1163       }
   1164 
   1165       // Read path segments.
   1166       for (int i = pos; i < limit; ) {
   1167         int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\");
   1168         boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit;
   1169         push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true);
   1170         i = pathSegmentDelimiterOffset;
   1171         if (segmentHasTrailingSlash) i++;
   1172       }
   1173     }
   1174 
   1175     /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */
   1176     private void push(String input, int pos, int limit, boolean addTrailingSlash,
   1177         boolean alreadyEncoded) {
   1178       String segment = canonicalize(
   1179           input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true);
   1180       if (isDot(segment)) {
   1181         return; // Skip '.' path segments.
   1182       }
   1183       if (isDotDot(segment)) {
   1184         pop();
   1185         return;
   1186       }
   1187 
   1188       // ANDROID-BEGIN: http://b/29983827
   1189       // If the encodedPathSegments doesn't even include "/" then add the leading "/" before
   1190       // pushing more segments or modifying existing segments.
   1191       if (encodedPathSegments.isEmpty()) {
   1192         encodedPathSegments.add("");
   1193       }
   1194       // ANDROID-END: http://b/29983827
   1195 
   1196       if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) {
   1197         encodedPathSegments.set(encodedPathSegments.size() - 1, segment);
   1198       } else {
   1199         encodedPathSegments.add(segment);
   1200       }
   1201       if (addTrailingSlash) {
   1202         encodedPathSegments.add("");
   1203       }
   1204     }
   1205 
   1206     private boolean isDot(String input) {
   1207       return input.equals(".") || input.equalsIgnoreCase("%2e");
   1208     }
   1209 
   1210     private boolean isDotDot(String input) {
   1211       return input.equals("..")
   1212           || input.equalsIgnoreCase("%2e.")
   1213           || input.equalsIgnoreCase(".%2e")
   1214           || input.equalsIgnoreCase("%2e%2e");
   1215     }
   1216 
   1217     /**
   1218      * Removes a path segment. When this method returns the last segment is always "", which means
   1219      * the encoded path will have a trailing '/'.
   1220      *
   1221      * <p>Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from
   1222      * ["a", "b", "c", ""] to ["a", "b", ""].
   1223      *
   1224      * <p>Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"]
   1225      * to ["a", "b", ""].
   1226      */
   1227     private void pop() {
   1228       // ANDROID-BEGIN: http://b/29983827
   1229       // Cannot pop() if there isn't even a "/". Leave the path as is. This method is only used
   1230       // from push(). push() handles the empty case explicitly.
   1231       if (encodedPathSegments.isEmpty()) {
   1232         return;
   1233       }
   1234       // ANDROID-END: http://b/29983827
   1235 
   1236       String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1);
   1237 
   1238       // Make sure the path ends with a '/' by either adding an empty string or clearing a segment.
   1239       if (removed.isEmpty() && !encodedPathSegments.isEmpty()) {
   1240         encodedPathSegments.set(encodedPathSegments.size() - 1, "");
   1241       } else {
   1242         encodedPathSegments.add("");
   1243       }
   1244     }
   1245 
   1246     /**
   1247      * Increments {@code pos} until {@code input[pos]} is not ASCII whitespace. Stops at {@code
   1248      * limit}.
   1249      */
   1250     private int skipLeadingAsciiWhitespace(String input, int pos, int limit) {
   1251       for (int i = pos; i < limit; i++) {
   1252         switch (input.charAt(i)) {
   1253           case '\t':
   1254           case '\n':
   1255           case '\f':
   1256           case '\r':
   1257           case ' ':
   1258             continue;
   1259           default:
   1260             return i;
   1261         }
   1262       }
   1263       return limit;
   1264     }
   1265 
   1266     /**
   1267      * Decrements {@code limit} until {@code input[limit - 1]} is not ASCII whitespace. Stops at
   1268      * {@code pos}.
   1269      */
   1270     private int skipTrailingAsciiWhitespace(String input, int pos, int limit) {
   1271       for (int i = limit - 1; i >= pos; i--) {
   1272         switch (input.charAt(i)) {
   1273           case '\t':
   1274           case '\n':
   1275           case '\f':
   1276           case '\r':
   1277           case ' ':
   1278             continue;
   1279           default:
   1280             return i + 1;
   1281         }
   1282       }
   1283       return pos;
   1284     }
   1285 
   1286     /**
   1287      * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if
   1288      * {@code input} does not have a scheme that starts at {@code pos}.
   1289      */
   1290     private static int schemeDelimiterOffset(String input, int pos, int limit) {
   1291       if (limit - pos < 2) return -1;
   1292 
   1293       char c0 = input.charAt(pos);
   1294       if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char.
   1295 
   1296       for (int i = pos + 1; i < limit; i++) {
   1297         char c = input.charAt(i);
   1298 
   1299         if ((c >= 'a' && c <= 'z')
   1300             || (c >= 'A' && c <= 'Z')
   1301             || (c >= '0' && c <= '9')
   1302             || c == '+'
   1303             || c == '-'
   1304             || c == '.') {
   1305           continue; // Scheme character. Keep going.
   1306         } else if (c == ':') {
   1307           return i; // Scheme prefix!
   1308         } else {
   1309           return -1; // Non-scheme character before the first ':'.
   1310         }
   1311       }
   1312 
   1313       return -1; // No ':'; doesn't start with a scheme.
   1314     }
   1315 
   1316     /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */
   1317     private static int slashCount(String input, int pos, int limit) {
   1318       int slashCount = 0;
   1319       while (pos < limit) {
   1320         char c = input.charAt(pos);
   1321         if (c == '\\' || c == '/') {
   1322           slashCount++;
   1323           pos++;
   1324         } else {
   1325           break;
   1326         }
   1327       }
   1328       return slashCount;
   1329     }
   1330 
   1331     /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */
   1332     private static int portColonOffset(String input, int pos, int limit) {
   1333       for (int i = pos; i < limit; i++) {
   1334         switch (input.charAt(i)) {
   1335           case '[':
   1336             while (++i < limit) {
   1337               if (input.charAt(i) == ']') break;
   1338             }
   1339             break;
   1340           case ':':
   1341             return i;
   1342         }
   1343       }
   1344       return limit; // No colon.
   1345     }
   1346 
   1347     private static String canonicalizeHost(String input, int pos, int limit) {
   1348       // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've
   1349       // checked for IPv6 square braces. But Chrome does it first, and that's more lenient.
   1350       String percentDecoded = percentDecode(input, pos, limit, false);
   1351 
   1352       // If the input is encased in square braces "[...]", drop 'em. We have an IPv6 address.
   1353       if (percentDecoded.startsWith("[") && percentDecoded.endsWith("]")) {
   1354         InetAddress inetAddress = decodeIpv6(percentDecoded, 1, percentDecoded.length() - 1);
   1355         if (inetAddress == null) return null;
   1356         byte[] address = inetAddress.getAddress();
   1357         if (address.length == 16) return inet6AddressToAscii(address);
   1358         throw new AssertionError();
   1359       }
   1360 
   1361       return domainToAscii(percentDecoded);
   1362     }
   1363 
   1364     /** Decodes an IPv6 address like 1111:2222:3333:4444:5555:6666:7777:8888 or ::1. */
   1365     private static InetAddress decodeIpv6(String input, int pos, int limit) {
   1366       byte[] address = new byte[16];
   1367       int b = 0;
   1368       int compress = -1;
   1369       int groupOffset = -1;
   1370 
   1371       for (int i = pos; i < limit; ) {
   1372         if (b == address.length) return null; // Too many groups.
   1373 
   1374         // Read a delimiter.
   1375         if (i + 2 <= limit && input.regionMatches(i, "::", 0, 2)) {
   1376           // Compression "::" delimiter, which is anywhere in the input, including its prefix.
   1377           if (compress != -1) return null; // Multiple "::" delimiters.
   1378           i += 2;
   1379           b += 2;
   1380           compress = b;
   1381           if (i == limit) break;
   1382         } else if (b != 0) {
   1383           // Group separator ":" delimiter.
   1384           if (input.regionMatches(i, ":", 0, 1)) {
   1385             i++;
   1386           } else if (input.regionMatches(i, ".", 0, 1)) {
   1387             // If we see a '.', rewind to the beginning of the previous group and parse as IPv4.
   1388             if (!decodeIpv4Suffix(input, groupOffset, limit, address, b - 2)) return null;
   1389             b += 2; // We rewound two bytes and then added four.
   1390             break;
   1391           } else {
   1392             return null; // Wrong delimiter.
   1393           }
   1394         }
   1395 
   1396         // Read a group, one to four hex digits.
   1397         int value = 0;
   1398         groupOffset = i;
   1399         for (; i < limit; i++) {
   1400           char c = input.charAt(i);
   1401           int hexDigit = decodeHexDigit(c);
   1402           if (hexDigit == -1) break;
   1403           value = (value << 4) + hexDigit;
   1404         }
   1405         int groupLength = i - groupOffset;
   1406         if (groupLength == 0 || groupLength > 4) return null; // Group is the wrong size.
   1407 
   1408         // We've successfully read a group. Assign its value to our byte array.
   1409         address[b++] = (byte) ((value >>> 8) & 0xff);
   1410         address[b++] = (byte) (value & 0xff);
   1411       }
   1412 
   1413       // All done. If compression happened, we need to move bytes to the right place in the
   1414       // address. Here's a sample:
   1415       //
   1416       //      input: "1111:2222:3333::7777:8888"
   1417       //     before: { 11, 11, 22, 22, 33, 33, 00, 00, 77, 77, 88, 88, 00, 00, 00, 00  }
   1418       //   compress: 6
   1419       //          b: 10
   1420       //      after: { 11, 11, 22, 22, 33, 33, 00, 00, 00, 00, 00, 00, 77, 77, 88, 88 }
   1421       //
   1422       if (b != address.length) {
   1423         if (compress == -1) return null; // Address didn't have compression or enough groups.
   1424         System.arraycopy(address, compress, address, address.length - (b - compress), b - compress);
   1425         Arrays.fill(address, compress, compress + (address.length - b), (byte) 0);
   1426       }
   1427 
   1428       try {
   1429         return InetAddress.getByAddress(address);
   1430       } catch (UnknownHostException e) {
   1431         throw new AssertionError();
   1432       }
   1433     }
   1434 
   1435     /** Decodes an IPv4 address suffix of an IPv6 address, like 1111::5555:6666:192.168.0.1. */
   1436     private static boolean decodeIpv4Suffix(
   1437         String input, int pos, int limit, byte[] address, int addressOffset) {
   1438       int b = addressOffset;
   1439 
   1440       for (int i = pos; i < limit; ) {
   1441         if (b == address.length) return false; // Too many groups.
   1442 
   1443         // Read a delimiter.
   1444         if (b != addressOffset) {
   1445           if (input.charAt(i) != '.') return false; // Wrong delimiter.
   1446           i++;
   1447         }
   1448 
   1449         // Read 1 or more decimal digits for a value in 0..255.
   1450         int value = 0;
   1451         int groupOffset = i;
   1452         for (; i < limit; i++) {
   1453           char c = input.charAt(i);
   1454           if (c < '0' || c > '9') break;
   1455           if (value == 0 && groupOffset != i) return false; // Reject unnecessary leading '0's.
   1456           value = (value * 10) + c - '0';
   1457           if (value > 255) return false; // Value out of range.
   1458         }
   1459         int groupLength = i - groupOffset;
   1460         if (groupLength == 0) return false; // No digits.
   1461 
   1462         // We've successfully read a byte.
   1463         address[b++] = (byte) value;
   1464       }
   1465 
   1466       if (b != addressOffset + 4) return false; // Too few groups. We wanted exactly four.
   1467       return true; // Success.
   1468     }
   1469 
   1470     /**
   1471      * Performs IDN ToASCII encoding and canonicalize the result to lowercase. e.g. This converts
   1472      * {@code .net} to {@code xn--n3h.net}, and {@code WwW.GoOgLe.cOm} to {@code www.google.com}.
   1473      * {@code null} will be returned if the input cannot be ToASCII encoded or if the result
   1474      * contains unsupported ASCII characters.
   1475      */
   1476     private static String domainToAscii(String input) {
   1477       try {
   1478         String result = IDN.toASCII(input).toLowerCase(Locale.US);
   1479         if (result.isEmpty()) return null;
   1480 
   1481         // Confirm that the IDN ToASCII result doesn't contain any illegal characters.
   1482         if (containsInvalidHostnameAsciiCodes(result)) {
   1483           return null;
   1484         }
   1485         // TODO: implement all label limits.
   1486         return result;
   1487       } catch (IllegalArgumentException e) {
   1488         return null;
   1489       }
   1490     }
   1491 
   1492     private static boolean containsInvalidHostnameAsciiCodes(String hostnameAscii) {
   1493       for (int i = 0; i < hostnameAscii.length(); i++) {
   1494         char c = hostnameAscii.charAt(i);
   1495         // The WHATWG Host parsing rules accepts some character codes which are invalid by
   1496         // definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
   1497         // we rule out characters that would cause problems in host headers.
   1498         if (c <= '\u001f' || c >= '\u007f') {
   1499           return true;
   1500         }
   1501         // Check for the characters mentioned in the WHATWG Host parsing spec:
   1502         // U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
   1503         // (excluding the characters covered above).
   1504         if (" #%/:?@[\\]".indexOf(c) != -1) {
   1505           return true;
   1506         }
   1507       }
   1508       return false;
   1509     }
   1510 
   1511     private static String inet6AddressToAscii(byte[] address) {
   1512       // Go through the address looking for the longest run of 0s. Each group is 2-bytes.
   1513       int longestRunOffset = -1;
   1514       int longestRunLength = 0;
   1515       for (int i = 0; i < address.length; i += 2) {
   1516         int currentRunOffset = i;
   1517         while (i < 16 && address[i] == 0 && address[i + 1] == 0) {
   1518           i += 2;
   1519         }
   1520         int currentRunLength = i - currentRunOffset;
   1521         if (currentRunLength > longestRunLength) {
   1522           longestRunOffset = currentRunOffset;
   1523           longestRunLength = currentRunLength;
   1524         }
   1525       }
   1526 
   1527       // Emit each 2-byte group in hex, separated by ':'. The longest run of zeroes is "::".
   1528       Buffer result = new Buffer();
   1529       for (int i = 0; i < address.length; ) {
   1530         if (i == longestRunOffset) {
   1531           result.writeByte(':');
   1532           i += longestRunLength;
   1533           if (i == 16) result.writeByte(':');
   1534         } else {
   1535           if (i > 0) result.writeByte(':');
   1536           int group = (address[i] & 0xff) << 8 | address[i + 1] & 0xff;
   1537           result.writeHexadecimalUnsignedLong(group);
   1538           i += 2;
   1539         }
   1540       }
   1541       return result.readUtf8();
   1542     }
   1543 
   1544     private static int parsePort(String input, int pos, int limit) {
   1545       try {
   1546         // Canonicalize the port string to skip '\n' etc.
   1547         String portString = canonicalize(input, pos, limit, "", false, false, false, true);
   1548         int i = Integer.parseInt(portString);
   1549         if (i > 0 && i <= 65535) return i;
   1550         return -1;
   1551       } catch (NumberFormatException e) {
   1552         return -1; // Invalid port.
   1553       }
   1554     }
   1555   }
   1556 
   1557   /**
   1558    * Returns the index of the first character in {@code input} that contains a character in {@code
   1559    * delimiters}. Returns limit if there is no such character.
   1560    */
   1561   private static int delimiterOffset(String input, int pos, int limit, String delimiters) {
   1562     for (int i = pos; i < limit; i++) {
   1563       if (delimiters.indexOf(input.charAt(i)) != -1) return i;
   1564     }
   1565     return limit;
   1566   }
   1567 
   1568   static String percentDecode(String encoded, boolean plusIsSpace) {
   1569     return percentDecode(encoded, 0, encoded.length(), plusIsSpace);
   1570   }
   1571 
   1572   private List<String> percentDecode(List<String> list, boolean plusIsSpace) {
   1573     List<String> result = new ArrayList<>(list.size());
   1574     for (String s : list) {
   1575       result.add(s != null ? percentDecode(s, plusIsSpace) : null);
   1576     }
   1577     return Collections.unmodifiableList(result);
   1578   }
   1579 
   1580   static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) {
   1581     for (int i = pos; i < limit; i++) {
   1582       char c = encoded.charAt(i);
   1583       if (c == '%' || (c == '+' && plusIsSpace)) {
   1584         // Slow path: the character at i requires decoding!
   1585         Buffer out = new Buffer();
   1586         out.writeUtf8(encoded, pos, i);
   1587         percentDecode(out, encoded, i, limit, plusIsSpace);
   1588         return out.readUtf8();
   1589       }
   1590     }
   1591 
   1592     // Fast path: no characters in [pos..limit) required decoding.
   1593     return encoded.substring(pos, limit);
   1594   }
   1595 
   1596   static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {
   1597     int codePoint;
   1598     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
   1599       codePoint = encoded.codePointAt(i);
   1600       if (codePoint == '%' && i + 2 < limit) {
   1601         int d1 = decodeHexDigit(encoded.charAt(i + 1));
   1602         int d2 = decodeHexDigit(encoded.charAt(i + 2));
   1603         if (d1 != -1 && d2 != -1) {
   1604           out.writeByte((d1 << 4) + d2);
   1605           i += 2;
   1606           continue;
   1607         }
   1608       } else if (codePoint == '+' && plusIsSpace) {
   1609         out.writeByte(' ');
   1610         continue;
   1611       }
   1612       out.writeUtf8CodePoint(codePoint);
   1613     }
   1614   }
   1615 
   1616   static boolean percentEncoded(String encoded, int pos, int limit) {
   1617     return pos + 2 < limit
   1618         && encoded.charAt(pos) == '%'
   1619         && decodeHexDigit(encoded.charAt(pos + 1)) != -1
   1620         && decodeHexDigit(encoded.charAt(pos + 2)) != -1;
   1621   }
   1622 
   1623   static int decodeHexDigit(char c) {
   1624     if (c >= '0' && c <= '9') return c - '0';
   1625     if (c >= 'a' && c <= 'f') return c - 'a' + 10;
   1626     if (c >= 'A' && c <= 'F') return c - 'A' + 10;
   1627     return -1;
   1628   }
   1629 
   1630   /**
   1631    * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following
   1632    * transformations:
   1633    * <ul>
   1634    *   <li>Tabs, newlines, form feeds and carriage returns are skipped.
   1635    *   <li>In queries, ' ' is encoded to '+' and '+' is encoded to "%2B".
   1636    *   <li>Characters in {@code encodeSet} are percent-encoded.
   1637    *   <li>Control characters and non-ASCII characters are percent-encoded.
   1638    *   <li>All other characters are copied without transformation.
   1639    * </ul>
   1640    *
   1641    * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
   1642    * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.
   1643    * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded
   1644    * @param asciiOnly true to encode all non-ASCII codepoints.
   1645    */
   1646   static String canonicalize(String input, int pos, int limit, String encodeSet,
   1647       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
   1648     int codePoint;
   1649     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
   1650       codePoint = input.codePointAt(i);
   1651       if (codePoint < 0x20
   1652           || codePoint == 0x7f
   1653           || codePoint >= 0x80 && asciiOnly
   1654           || encodeSet.indexOf(codePoint) != -1
   1655           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))
   1656           || codePoint == '+' && plusIsSpace) {
   1657         // Slow path: the character at i requires encoding!
   1658         Buffer out = new Buffer();
   1659         out.writeUtf8(input, pos, i);
   1660         canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace,
   1661             asciiOnly);
   1662         return out.readUtf8();
   1663       }
   1664     }
   1665 
   1666     // Fast path: no characters in [pos..limit) required encoding.
   1667     return input.substring(pos, limit);
   1668   }
   1669 
   1670   static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet,
   1671       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly) {
   1672     Buffer utf8Buffer = null; // Lazily allocated.
   1673     int codePoint;
   1674     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
   1675       codePoint = input.codePointAt(i);
   1676       if (alreadyEncoded
   1677           && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) {
   1678         // Skip this character.
   1679       } else if (codePoint == '+' && plusIsSpace) {
   1680         // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
   1681         out.writeUtf8(alreadyEncoded ? "+" : "%2B");
   1682       } else if (codePoint < 0x20
   1683           || codePoint == 0x7f
   1684           || codePoint >= 0x80 && asciiOnly
   1685           || encodeSet.indexOf(codePoint) != -1
   1686           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) {
   1687         // Percent encode this character.
   1688         if (utf8Buffer == null) {
   1689           utf8Buffer = new Buffer();
   1690         }
   1691         utf8Buffer.writeUtf8CodePoint(codePoint);
   1692         while (!utf8Buffer.exhausted()) {
   1693           int b = utf8Buffer.readByte() & 0xff;
   1694           out.writeByte('%');
   1695           out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]);
   1696           out.writeByte(HEX_DIGITS[b & 0xf]);
   1697         }
   1698       } else {
   1699         // This character doesn't need encoding. Just copy it over.
   1700         out.writeUtf8CodePoint(codePoint);
   1701       }
   1702     }
   1703   }
   1704 
   1705   static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,
   1706       boolean plusIsSpace, boolean asciiOnly) {
   1707     return canonicalize(
   1708         input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly);
   1709   }
   1710 }
   1711