1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.browser; 18 19 import android.net.Uri; 20 import android.util.Patterns; 21 import android.webkit.URLUtil; 22 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 25 26 /** 27 * Utility methods for Url manipulation 28 */ 29 public class UrlUtils { 30 31 static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile( 32 "(?i)" + // switch on case insensitive matching 33 "(" + // begin group for schema 34 "(?:http|https|file):\\/\\/" + 35 "|(?:inline|data|about|javascript):" + 36 ")" + 37 "(.*)" ); 38 39 // Google search 40 private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s"; 41 private final static String QUERY_PLACE_HOLDER = "%s"; 42 43 // Regular expression to strip http:// and optionally 44 // the trailing slash 45 private static final Pattern STRIP_URL_PATTERN = 46 Pattern.compile("^http://(.*?)/?$"); 47 48 private UrlUtils() { /* cannot be instantiated */ } 49 50 /** 51 * Strips the provided url of preceding "http://" and any trailing "/". Does not 52 * strip "https://". If the provided string cannot be stripped, the original string 53 * is returned. 54 * 55 * TODO: Put this in TextUtils to be used by other packages doing something similar. 56 * 57 * @param url a url to strip, like "http://www.google.com/" 58 * @return a stripped url like "www.google.com", or the original string if it could 59 * not be stripped 60 */ 61 public static String stripUrl(String url) { 62 if (url == null) return null; 63 Matcher m = STRIP_URL_PATTERN.matcher(url); 64 if (m.matches()) { 65 return m.group(1); 66 } else { 67 return url; 68 } 69 } 70 71 protected static String smartUrlFilter(Uri inUri) { 72 if (inUri != null) { 73 return smartUrlFilter(inUri.toString()); 74 } 75 return null; 76 } 77 78 /** 79 * Attempts to determine whether user input is a URL or search 80 * terms. Anything with a space is passed to search. 81 * 82 * Converts to lowercase any mistakenly uppercased schema (i.e., 83 * "Http://" converts to "http://" 84 * 85 * @return Original or modified URL 86 * 87 */ 88 public static String smartUrlFilter(String url) { 89 return smartUrlFilter(url, true); 90 } 91 92 /** 93 * Attempts to determine whether user input is a URL or search 94 * terms. Anything with a space is passed to search if canBeSearch is true. 95 * 96 * Converts to lowercase any mistakenly uppercased schema (i.e., 97 * "Http://" converts to "http://" 98 * 99 * @param canBeSearch If true, will return a search url if it isn't a valid 100 * URL. If false, invalid URLs will return null 101 * @return Original or modified URL 102 * 103 */ 104 public static String smartUrlFilter(String url, boolean canBeSearch) { 105 String inUrl = url.trim(); 106 boolean hasSpace = inUrl.indexOf(' ') != -1; 107 108 Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl); 109 if (matcher.matches()) { 110 // force scheme to lowercase 111 String scheme = matcher.group(1); 112 String lcScheme = scheme.toLowerCase(); 113 if (!lcScheme.equals(scheme)) { 114 inUrl = lcScheme + matcher.group(2); 115 } 116 if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) { 117 inUrl = inUrl.replace(" ", "%20"); 118 } 119 return inUrl; 120 } 121 if (!hasSpace) { 122 if (Patterns.WEB_URL.matcher(inUrl).matches()) { 123 return URLUtil.guessUrl(inUrl); 124 } 125 } 126 if (canBeSearch) { 127 return URLUtil.composeSearchUrl(inUrl, 128 QUICKSEARCH_G, QUERY_PLACE_HOLDER); 129 } 130 return null; 131 } 132 133 /* package */ static String fixUrl(String inUrl) { 134 // FIXME: Converting the url to lower case 135 // duplicates functionality in smartUrlFilter(). 136 // However, changing all current callers of fixUrl to 137 // call smartUrlFilter in addition may have unwanted 138 // consequences, and is deferred for now. 139 int colon = inUrl.indexOf(':'); 140 boolean allLower = true; 141 for (int index = 0; index < colon; index++) { 142 char ch = inUrl.charAt(index); 143 if (!Character.isLetter(ch)) { 144 break; 145 } 146 allLower &= Character.isLowerCase(ch); 147 if (index == colon - 1 && !allLower) { 148 inUrl = inUrl.substring(0, colon).toLowerCase() 149 + inUrl.substring(colon); 150 } 151 } 152 if (inUrl.startsWith("http://") || inUrl.startsWith("https://")) 153 return inUrl; 154 if (inUrl.startsWith("http:") || 155 inUrl.startsWith("https:")) { 156 if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) { 157 inUrl = inUrl.replaceFirst("/", "//"); 158 } else inUrl = inUrl.replaceFirst(":", "://"); 159 } 160 return inUrl; 161 } 162 163 // Returns the filtered URL. Cannot return null, but can return an empty string 164 /* package */ static String filteredUrl(String inUrl) { 165 if (inUrl == null) { 166 return ""; 167 } 168 if (inUrl.startsWith("content:") 169 || inUrl.startsWith("browser:")) { 170 return ""; 171 } 172 return inUrl; 173 } 174 175 } 176