1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 package org.chromium.chrome.browser; 6 7 import android.text.TextUtils; 8 9 import org.chromium.base.CollectionUtil; 10 11 import java.net.URI; 12 import java.net.URISyntaxException; 13 import java.util.HashSet; 14 15 /** 16 * Utilities for working with URIs (and URLs). These methods may be used in security-sensitive 17 * contexts (after all, origins are the security boundary on the web), and so the correctness bar 18 * must be high. 19 */ 20 public class UrlUtilities { 21 /** 22 * URI schemes that ContentView can handle. 23 */ 24 private static final HashSet<String> ACCEPTED_SCHEMES = CollectionUtil.newHashSet( 25 "about", "data", "file", "http", "https", "inline", "javascript"); 26 27 /** 28 * URI schemes that Chrome can download. 29 */ 30 private static final HashSet<String> DOWNLOADABLE_SCHEMES = CollectionUtil.newHashSet( 31 "data", "filesystem", "http", "https"); 32 33 /** 34 * @param uri A URI. 35 * 36 * @return True if the URI's scheme is one that ContentView can handle. 37 */ 38 public static boolean isAcceptedScheme(URI uri) { 39 return ACCEPTED_SCHEMES.contains(uri.getScheme()); 40 } 41 42 /** 43 * @param uri A URI. 44 * 45 * @return True if the URI's scheme is one that ContentView can handle. 46 */ 47 public static boolean isAcceptedScheme(String uri) { 48 try { 49 return isAcceptedScheme(new URI(uri)); 50 } catch (URISyntaxException e) { 51 return false; 52 } 53 } 54 55 /** 56 * @param uri A URI. 57 * 58 * @return True if the URI's scheme is one that Chrome can download. 59 */ 60 public static boolean isDownloadableScheme(URI uri) { 61 return DOWNLOADABLE_SCHEMES.contains(uri.getScheme()); 62 } 63 64 /** 65 * @param uri A URI. 66 * 67 * @return True if the URI's scheme is one that Chrome can download. 68 */ 69 public static boolean isDownloadableScheme(String uri) { 70 try { 71 return isDownloadableScheme(new URI(uri)); 72 } catch (URISyntaxException e) { 73 return false; 74 } 75 } 76 77 /** 78 * @param uri A URI to repair. 79 * 80 * @return A String representation of a URI that will be valid for loading in a ContentView. 81 */ 82 public static String fixUrl(String uri) { 83 if (uri == null) return null; 84 85 try { 86 String fixedUri = uri.trim(); 87 if (fixedUri.indexOf("://") == 0) { 88 return "http" + fixedUri; 89 } 90 if (fixedUri.indexOf(":") == -1) { 91 return "http://" + fixedUri; 92 } 93 94 URI parsed = new URI(fixedUri); 95 if (parsed.getScheme() == null) { 96 parsed = new URI( 97 "http", 98 null, // userInfo 99 parsed.getHost(), 100 parsed.getPort(), 101 parsed.getRawPath(), 102 parsed.getRawQuery(), 103 parsed.getRawFragment()); 104 } 105 return parsed.toString(); 106 } catch (URISyntaxException e) { 107 // Can't do anything. 108 return uri; 109 } 110 } 111 112 /** 113 * Refer to UrlFixerUpper::FixupURL. 114 * 115 * Compare to {@link #fixUrl(String)}, This fixes URL more aggressively including Chrome 116 * specific cases. For example, "about:" becomes "chrome://version/". However, this is not a 117 * superset of {@link #fixUrl(String)} either. For example, this function doesn't do anything 118 * with "://mail.google.com:/", while the other one prepends "http". Also, for 119 * "//mail.google.com:/", this function prepends "file" while the other one prepends "http". 120 */ 121 public static String fixupUrl(String uri) { 122 return nativeFixupUrl(uri, null); 123 } 124 125 /** 126 * Builds a String that strips down the URL to the its scheme, host, and port. 127 * @param uri URI to break down. 128 * @param showScheme Whether or not to show the scheme. If the URL can't be parsed, this value 129 * is ignored. 130 * @return Stripped-down String containing the essential bits of the URL, or the original URL if 131 * it fails to parse it. 132 */ 133 public static String getOriginForDisplay(URI uri, boolean showScheme) { 134 String scheme = uri.getScheme(); 135 String host = uri.getHost(); 136 int port = uri.getPort(); 137 138 String displayUrl; 139 if (TextUtils.isEmpty(scheme) || TextUtils.isEmpty(host)) { 140 displayUrl = uri.toString(); 141 } else { 142 if (showScheme) { 143 scheme += "://"; 144 } else { 145 scheme = ""; 146 } 147 148 if (port == -1 || (port == 80 && "http".equals(scheme)) 149 || (port == 443 && "https".equals(scheme))) { 150 displayUrl = scheme + host; 151 } else { 152 displayUrl = scheme + host + ":" + port; 153 } 154 } 155 156 return displayUrl; 157 } 158 159 /** 160 * Determines whether or not the given URLs belong to the same broad domain or host. 161 * "Broad domain" is defined as the TLD + 1 or the host. 162 * 163 * For example, the TLD + 1 for http://news.google.com would be "google.com" and would be shared 164 * with other Google properties like http://finance.google.com. 165 * 166 * If {@code includePrivateRegistries} is marked as true, then private domain registries (like 167 * appspot.com) are considered "effective TLDs" -- all subdomains of appspot.com would be 168 * considered distinct (effective TLD = ".appspot.com" + 1). 169 * This means that http://chromiumreview.appspot.com and http://example.appspot.com would not 170 * belong to the same host. 171 * If {@code includePrivateRegistries} is false, all subdomains of appspot.com 172 * would be considered to be the same domain (TLD = ".com" + 1). 173 * 174 * @param primaryUrl First URL 175 * @param secondaryUrl Second URL 176 * @param includePrivateRegistries Whether or not to consider private registries. 177 * @return True iff the two URIs belong to the same domain or host. 178 */ 179 public static boolean sameDomainOrHost(String primaryUrl, String secondaryUrl, 180 boolean includePrivateRegistries) { 181 return nativeSameDomainOrHost(primaryUrl, secondaryUrl, includePrivateRegistries); 182 } 183 184 /** 185 * This function works by calling net::registry_controlled_domains::GetDomainAndRegistry 186 * 187 * @param uri A URI 188 * @param includePrivateRegistries Whether or not to consider private registries. 189 * 190 * @return The registered, organization-identifying host and all its registry information, but 191 * no subdomains, from the given URI. Returns an empty string if the URI is invalid, has no host 192 * (e.g. a file: URI), has multiple trailing dots, is an IP address, has only one subcomponent 193 * (i.e. no dots other than leading/trailing ones), or is itself a recognized registry 194 * identifier. 195 */ 196 public static String getDomainAndRegistry(String uri, boolean includePrivateRegistries) { 197 return nativeGetDomainAndRegistry(uri, includePrivateRegistries); 198 } 199 200 private static native boolean nativeSameDomainOrHost(String primaryUrl, String secondaryUrl, 201 boolean includePrivateRegistries); 202 private static native String nativeGetDomainAndRegistry(String url, 203 boolean includePrivateRegistries); 204 public static native boolean nativeIsGoogleSearchUrl(String url); 205 public static native boolean nativeIsGoogleHomePageUrl(String url); 206 public static native String nativeFixupUrl(String url, String desiredTld); 207 } 208