1 /* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.webkit; 18 19 import java.io.UnsupportedEncodingException; 20 import java.util.regex.Matcher; 21 import java.util.regex.Pattern; 22 23 import android.net.Uri; 24 import android.net.ParseException; 25 import android.net.WebAddress; 26 import android.util.Log; 27 28 public final class URLUtil { 29 30 private static final String LOGTAG = "webkit"; 31 32 // to refer to bar.png under your package's asset/foo/ directory, use 33 // "file:///android_asset/foo/bar.png". 34 static final String ASSET_BASE = "file:///android_asset/"; 35 // to refer to bar.png under your package's res/drawable/ directory, use 36 // "file:///android_res/drawable/bar.png". Use "drawable" to refer to 37 // "drawable-hdpi" directory as well. 38 static final String RESOURCE_BASE = "file:///android_res/"; 39 static final String FILE_BASE = "file://"; 40 static final String PROXY_BASE = "file:///cookieless_proxy/"; 41 static final String CONTENT_BASE = "content:"; 42 43 /** 44 * Cleans up (if possible) user-entered web addresses 45 */ 46 public static String guessUrl(String inUrl) { 47 48 String retVal = inUrl; 49 WebAddress webAddress; 50 51 if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); 52 53 if (inUrl.length() == 0) return inUrl; 54 if (inUrl.startsWith("about:")) return inUrl; 55 // Do not try to interpret data scheme URLs 56 if (inUrl.startsWith("data:")) return inUrl; 57 // Do not try to interpret file scheme URLs 58 if (inUrl.startsWith("file:")) return inUrl; 59 // Do not try to interpret javascript scheme URLs 60 if (inUrl.startsWith("javascript:")) return inUrl; 61 62 // bug 762454: strip period off end of url 63 if (inUrl.endsWith(".") == true) { 64 inUrl = inUrl.substring(0, inUrl.length() - 1); 65 } 66 67 try { 68 webAddress = new WebAddress(inUrl); 69 } catch (ParseException ex) { 70 71 if (DebugFlags.URL_UTIL) { 72 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); 73 } 74 return retVal; 75 } 76 77 // Check host 78 if (webAddress.getHost().indexOf('.') == -1) { 79 // no dot: user probably entered a bare domain. try .com 80 webAddress.setHost("www." + webAddress.getHost() + ".com"); 81 } 82 return webAddress.toString(); 83 } 84 85 public static String composeSearchUrl(String inQuery, String template, 86 String queryPlaceHolder) { 87 int placeHolderIndex = template.indexOf(queryPlaceHolder); 88 if (placeHolderIndex < 0) { 89 return null; 90 } 91 92 String query; 93 StringBuilder buffer = new StringBuilder(); 94 buffer.append(template.substring(0, placeHolderIndex)); 95 96 try { 97 query = java.net.URLEncoder.encode(inQuery, "utf-8"); 98 buffer.append(query); 99 } catch (UnsupportedEncodingException ex) { 100 return null; 101 } 102 103 buffer.append(template.substring( 104 placeHolderIndex + queryPlaceHolder.length())); 105 106 return buffer.toString(); 107 } 108 109 public static byte[] decode(byte[] url) throws IllegalArgumentException { 110 if (url.length == 0) { 111 return new byte[0]; 112 } 113 114 // Create a new byte array with the same length to ensure capacity 115 byte[] tempData = new byte[url.length]; 116 117 int tempCount = 0; 118 for (int i = 0; i < url.length; i++) { 119 byte b = url[i]; 120 if (b == '%') { 121 if (url.length - i > 2) { 122 b = (byte) (parseHex(url[i + 1]) * 16 123 + parseHex(url[i + 2])); 124 i += 2; 125 } else { 126 throw new IllegalArgumentException("Invalid format"); 127 } 128 } 129 tempData[tempCount++] = b; 130 } 131 byte[] retData = new byte[tempCount]; 132 System.arraycopy(tempData, 0, retData, 0, tempCount); 133 return retData; 134 } 135 136 /** 137 * @return True iff the url is correctly URL encoded 138 */ 139 static boolean verifyURLEncoding(String url) { 140 int count = url.length(); 141 if (count == 0) { 142 return false; 143 } 144 145 int index = url.indexOf('%'); 146 while (index >= 0 && index < count) { 147 if (index < count - 2) { 148 try { 149 parseHex((byte) url.charAt(++index)); 150 parseHex((byte) url.charAt(++index)); 151 } catch (IllegalArgumentException e) { 152 return false; 153 } 154 } else { 155 return false; 156 } 157 index = url.indexOf('%', index + 1); 158 } 159 return true; 160 } 161 162 private static int parseHex(byte b) { 163 if (b >= '0' && b <= '9') return (b - '0'); 164 if (b >= 'A' && b <= 'F') return (b - 'A' + 10); 165 if (b >= 'a' && b <= 'f') return (b - 'a' + 10); 166 167 throw new IllegalArgumentException("Invalid hex char '" + b + "'"); 168 } 169 170 /** 171 * @return True iff the url is an asset file. 172 */ 173 public static boolean isAssetUrl(String url) { 174 return (null != url) && url.startsWith(ASSET_BASE); 175 } 176 177 /** 178 * @return True iff the url is a resource file. 179 * @hide 180 */ 181 public static boolean isResourceUrl(String url) { 182 return (null != url) && url.startsWith(RESOURCE_BASE); 183 } 184 185 /** 186 * @return True iff the url is a proxy url to allow cookieless network 187 * requests from a file url. 188 * @deprecated Cookieless proxy is no longer supported. 189 */ 190 @Deprecated 191 public static boolean isCookielessProxyUrl(String url) { 192 return (null != url) && url.startsWith(PROXY_BASE); 193 } 194 195 /** 196 * @return True iff the url is a local file. 197 */ 198 public static boolean isFileUrl(String url) { 199 return (null != url) && (url.startsWith(FILE_BASE) && 200 !url.startsWith(ASSET_BASE) && 201 !url.startsWith(PROXY_BASE)); 202 } 203 204 /** 205 * @return True iff the url is an about: url. 206 */ 207 public static boolean isAboutUrl(String url) { 208 return (null != url) && url.startsWith("about:"); 209 } 210 211 /** 212 * @return True iff the url is a data: url. 213 */ 214 public static boolean isDataUrl(String url) { 215 return (null != url) && url.startsWith("data:"); 216 } 217 218 /** 219 * @return True iff the url is a javascript: url. 220 */ 221 public static boolean isJavaScriptUrl(String url) { 222 return (null != url) && url.startsWith("javascript:"); 223 } 224 225 /** 226 * @return True iff the url is an http: url. 227 */ 228 public static boolean isHttpUrl(String url) { 229 return (null != url) && 230 (url.length() > 6) && 231 url.substring(0, 7).equalsIgnoreCase("http://"); 232 } 233 234 /** 235 * @return True iff the url is an https: url. 236 */ 237 public static boolean isHttpsUrl(String url) { 238 return (null != url) && 239 (url.length() > 7) && 240 url.substring(0, 8).equalsIgnoreCase("https://"); 241 } 242 243 /** 244 * @return True iff the url is a network url. 245 */ 246 public static boolean isNetworkUrl(String url) { 247 if (url == null || url.length() == 0) { 248 return false; 249 } 250 return isHttpUrl(url) || isHttpsUrl(url); 251 } 252 253 /** 254 * @return True iff the url is a content: url. 255 */ 256 public static boolean isContentUrl(String url) { 257 return (null != url) && url.startsWith(CONTENT_BASE); 258 } 259 260 /** 261 * @return True iff the url is valid. 262 */ 263 public static boolean isValidUrl(String url) { 264 if (url == null || url.length() == 0) { 265 return false; 266 } 267 268 return (isAssetUrl(url) || 269 isResourceUrl(url) || 270 isFileUrl(url) || 271 isAboutUrl(url) || 272 isHttpUrl(url) || 273 isHttpsUrl(url) || 274 isJavaScriptUrl(url) || 275 isContentUrl(url)); 276 } 277 278 /** 279 * Strips the url of the anchor. 280 */ 281 public static String stripAnchor(String url) { 282 int anchorIndex = url.indexOf('#'); 283 if (anchorIndex != -1) { 284 return url.substring(0, anchorIndex); 285 } 286 return url; 287 } 288 289 /** 290 * Guesses canonical filename that a download would have, using 291 * the URL and contentDisposition. File extension, if not defined, 292 * is added based on the mimetype 293 * @param url Url to the content 294 * @param contentDisposition Content-Disposition HTTP header or null 295 * @param mimeType Mime-type of the content or null 296 * 297 * @return suggested filename 298 */ 299 public static final String guessFileName( 300 String url, 301 String contentDisposition, 302 String mimeType) { 303 String filename = null; 304 String extension = null; 305 306 // If we couldn't do anything with the hint, move toward the content disposition 307 if (filename == null && contentDisposition != null) { 308 filename = parseContentDisposition(contentDisposition); 309 if (filename != null) { 310 int index = filename.lastIndexOf('/') + 1; 311 if (index > 0) { 312 filename = filename.substring(index); 313 } 314 } 315 } 316 317 // If all the other http-related approaches failed, use the plain uri 318 if (filename == null) { 319 String decodedUrl = Uri.decode(url); 320 if (decodedUrl != null) { 321 int queryIndex = decodedUrl.indexOf('?'); 322 // If there is a query string strip it, same as desktop browsers 323 if (queryIndex > 0) { 324 decodedUrl = decodedUrl.substring(0, queryIndex); 325 } 326 if (!decodedUrl.endsWith("/")) { 327 int index = decodedUrl.lastIndexOf('/') + 1; 328 if (index > 0) { 329 filename = decodedUrl.substring(index); 330 } 331 } 332 } 333 } 334 335 // Finally, if couldn't get filename from URI, get a generic filename 336 if (filename == null) { 337 filename = "downloadfile"; 338 } 339 340 // Split filename between base and extension 341 // Add an extension if filename does not have one 342 int dotIndex = filename.indexOf('.'); 343 if (dotIndex < 0) { 344 if (mimeType != null) { 345 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 346 if (extension != null) { 347 extension = "." + extension; 348 } 349 } 350 if (extension == null) { 351 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) { 352 if (mimeType.equalsIgnoreCase("text/html")) { 353 extension = ".html"; 354 } else { 355 extension = ".txt"; 356 } 357 } else { 358 extension = ".bin"; 359 } 360 } 361 } else { 362 if (mimeType != null) { 363 // Compare the last segment of the extension against the mime type. 364 // If there's a mismatch, discard the entire extension. 365 int lastDotIndex = filename.lastIndexOf('.'); 366 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension( 367 filename.substring(lastDotIndex + 1)); 368 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) { 369 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 370 if (extension != null) { 371 extension = "." + extension; 372 } 373 } 374 } 375 if (extension == null) { 376 extension = filename.substring(dotIndex); 377 } 378 filename = filename.substring(0, dotIndex); 379 } 380 381 return filename + extension; 382 } 383 384 /** Regex used to parse content-disposition headers */ 385 private static final Pattern CONTENT_DISPOSITION_PATTERN = 386 Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$", 387 Pattern.CASE_INSENSITIVE); 388 389 /* 390 * Parse the Content-Disposition HTTP Header. The format of the header 391 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html 392 * This header provides a filename for content that is going to be 393 * downloaded to the file system. We only support the attachment type. 394 * Note that RFC 2616 specifies the filename value must be double-quoted. 395 * Unfortunately some servers do not quote the value so to maintain 396 * consistent behaviour with other browsers, we allow unquoted values too. 397 */ 398 static String parseContentDisposition(String contentDisposition) { 399 try { 400 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); 401 if (m.find()) { 402 return m.group(2); 403 } 404 } catch (IllegalStateException ex) { 405 // This function is defined as returning null when it can't parse the header 406 } 407 return null; 408 } 409 } 410