1 /* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.webkit; 18 19 import java.io.UnsupportedEncodingException; 20 import java.util.Locale; 21 import java.util.regex.Matcher; 22 import java.util.regex.Pattern; 23 24 import android.net.Uri; 25 import android.net.ParseException; 26 import android.net.WebAddress; 27 import android.util.Log; 28 29 public final class URLUtil { 30 31 private static final String LOGTAG = "webkit"; 32 private static final boolean TRACE = false; 33 34 // to refer to bar.png under your package's asset/foo/ directory, use 35 // "file:///android_asset/foo/bar.png". 36 static final String ASSET_BASE = "file:///android_asset/"; 37 // to refer to bar.png under your package's res/drawable/ directory, use 38 // "file:///android_res/drawable/bar.png". Use "drawable" to refer to 39 // "drawable-hdpi" directory as well. 40 static final String RESOURCE_BASE = "file:///android_res/"; 41 static final String FILE_BASE = "file://"; 42 static final String PROXY_BASE = "file:///cookieless_proxy/"; 43 static final String CONTENT_BASE = "content:"; 44 45 /** 46 * Cleans up (if possible) user-entered web addresses 47 */ 48 public static String guessUrl(String inUrl) { 49 50 String retVal = inUrl; 51 WebAddress webAddress; 52 53 if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); 54 55 if (inUrl.length() == 0) return inUrl; 56 if (inUrl.startsWith("about:")) return inUrl; 57 // Do not try to interpret data scheme URLs 58 if (inUrl.startsWith("data:")) return inUrl; 59 // Do not try to interpret file scheme URLs 60 if (inUrl.startsWith("file:")) return inUrl; 61 // Do not try to interpret javascript scheme URLs 62 if (inUrl.startsWith("javascript:")) return inUrl; 63 64 // bug 762454: strip period off end of url 65 if (inUrl.endsWith(".") == true) { 66 inUrl = inUrl.substring(0, inUrl.length() - 1); 67 } 68 69 try { 70 webAddress = new WebAddress(inUrl); 71 } catch (ParseException ex) { 72 73 if (TRACE) { 74 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); 75 } 76 return retVal; 77 } 78 79 // Check host 80 if (webAddress.getHost().indexOf('.') == -1) { 81 // no dot: user probably entered a bare domain. try .com 82 webAddress.setHost("www." + webAddress.getHost() + ".com"); 83 } 84 return webAddress.toString(); 85 } 86 87 public static String composeSearchUrl(String inQuery, String template, 88 String queryPlaceHolder) { 89 int placeHolderIndex = template.indexOf(queryPlaceHolder); 90 if (placeHolderIndex < 0) { 91 return null; 92 } 93 94 String query; 95 StringBuilder buffer = new StringBuilder(); 96 buffer.append(template.substring(0, placeHolderIndex)); 97 98 try { 99 query = java.net.URLEncoder.encode(inQuery, "utf-8"); 100 buffer.append(query); 101 } catch (UnsupportedEncodingException ex) { 102 return null; 103 } 104 105 buffer.append(template.substring( 106 placeHolderIndex + queryPlaceHolder.length())); 107 108 return buffer.toString(); 109 } 110 111 public static byte[] decode(byte[] url) throws IllegalArgumentException { 112 if (url.length == 0) { 113 return new byte[0]; 114 } 115 116 // Create a new byte array with the same length to ensure capacity 117 byte[] tempData = new byte[url.length]; 118 119 int tempCount = 0; 120 for (int i = 0; i < url.length; i++) { 121 byte b = url[i]; 122 if (b == '%') { 123 if (url.length - i > 2) { 124 b = (byte) (parseHex(url[i + 1]) * 16 125 + parseHex(url[i + 2])); 126 i += 2; 127 } else { 128 throw new IllegalArgumentException("Invalid format"); 129 } 130 } 131 tempData[tempCount++] = b; 132 } 133 byte[] retData = new byte[tempCount]; 134 System.arraycopy(tempData, 0, retData, 0, tempCount); 135 return retData; 136 } 137 138 /** 139 * @return True iff the url is correctly URL encoded 140 */ 141 static boolean verifyURLEncoding(String url) { 142 int count = url.length(); 143 if (count == 0) { 144 return false; 145 } 146 147 int index = url.indexOf('%'); 148 while (index >= 0 && index < count) { 149 if (index < count - 2) { 150 try { 151 parseHex((byte) url.charAt(++index)); 152 parseHex((byte) url.charAt(++index)); 153 } catch (IllegalArgumentException e) { 154 return false; 155 } 156 } else { 157 return false; 158 } 159 index = url.indexOf('%', index + 1); 160 } 161 return true; 162 } 163 164 private static int parseHex(byte b) { 165 if (b >= '0' && b <= '9') return (b - '0'); 166 if (b >= 'A' && b <= 'F') return (b - 'A' + 10); 167 if (b >= 'a' && b <= 'f') return (b - 'a' + 10); 168 169 throw new IllegalArgumentException("Invalid hex char '" + b + "'"); 170 } 171 172 /** 173 * @return True iff the url is an asset file. 174 */ 175 public static boolean isAssetUrl(String url) { 176 return (null != url) && url.startsWith(ASSET_BASE); 177 } 178 179 /** 180 * @return True iff the url is a resource file. 181 * @hide 182 */ 183 public static boolean isResourceUrl(String url) { 184 return (null != url) && url.startsWith(RESOURCE_BASE); 185 } 186 187 /** 188 * @return True iff the url is a proxy url to allow cookieless network 189 * requests from a file url. 190 * @deprecated Cookieless proxy is no longer supported. 191 */ 192 @Deprecated 193 public static boolean isCookielessProxyUrl(String url) { 194 return (null != url) && url.startsWith(PROXY_BASE); 195 } 196 197 /** 198 * @return True iff the url is a local file. 199 */ 200 public static boolean isFileUrl(String url) { 201 return (null != url) && (url.startsWith(FILE_BASE) && 202 !url.startsWith(ASSET_BASE) && 203 !url.startsWith(PROXY_BASE)); 204 } 205 206 /** 207 * @return True iff the url is an about: url. 208 */ 209 public static boolean isAboutUrl(String url) { 210 return (null != url) && url.startsWith("about:"); 211 } 212 213 /** 214 * @return True iff the url is a data: url. 215 */ 216 public static boolean isDataUrl(String url) { 217 return (null != url) && url.startsWith("data:"); 218 } 219 220 /** 221 * @return True iff the url is a javascript: url. 222 */ 223 public static boolean isJavaScriptUrl(String url) { 224 return (null != url) && url.startsWith("javascript:"); 225 } 226 227 /** 228 * @return True iff the url is an http: url. 229 */ 230 public static boolean isHttpUrl(String url) { 231 return (null != url) && 232 (url.length() > 6) && 233 url.substring(0, 7).equalsIgnoreCase("http://"); 234 } 235 236 /** 237 * @return True iff the url is an https: url. 238 */ 239 public static boolean isHttpsUrl(String url) { 240 return (null != url) && 241 (url.length() > 7) && 242 url.substring(0, 8).equalsIgnoreCase("https://"); 243 } 244 245 /** 246 * @return True iff the url is a network url. 247 */ 248 public static boolean isNetworkUrl(String url) { 249 if (url == null || url.length() == 0) { 250 return false; 251 } 252 return isHttpUrl(url) || isHttpsUrl(url); 253 } 254 255 /** 256 * @return True iff the url is a content: url. 257 */ 258 public static boolean isContentUrl(String url) { 259 return (null != url) && url.startsWith(CONTENT_BASE); 260 } 261 262 /** 263 * @return True iff the url is valid. 264 */ 265 public static boolean isValidUrl(String url) { 266 if (url == null || url.length() == 0) { 267 return false; 268 } 269 270 return (isAssetUrl(url) || 271 isResourceUrl(url) || 272 isFileUrl(url) || 273 isAboutUrl(url) || 274 isHttpUrl(url) || 275 isHttpsUrl(url) || 276 isJavaScriptUrl(url) || 277 isContentUrl(url)); 278 } 279 280 /** 281 * Strips the url of the anchor. 282 */ 283 public static String stripAnchor(String url) { 284 int anchorIndex = url.indexOf('#'); 285 if (anchorIndex != -1) { 286 return url.substring(0, anchorIndex); 287 } 288 return url; 289 } 290 291 /** 292 * Guesses canonical filename that a download would have, using 293 * the URL and contentDisposition. File extension, if not defined, 294 * is added based on the mimetype 295 * @param url Url to the content 296 * @param contentDisposition Content-Disposition HTTP header or null 297 * @param mimeType Mime-type of the content or null 298 * 299 * @return suggested filename 300 */ 301 public static final String guessFileName( 302 String url, 303 String contentDisposition, 304 String mimeType) { 305 String filename = null; 306 String extension = null; 307 308 // If we couldn't do anything with the hint, move toward the content disposition 309 if (filename == null && contentDisposition != null) { 310 filename = parseContentDisposition(contentDisposition); 311 if (filename != null) { 312 int index = filename.lastIndexOf('/') + 1; 313 if (index > 0) { 314 filename = filename.substring(index); 315 } 316 } 317 } 318 319 // If all the other http-related approaches failed, use the plain uri 320 if (filename == null) { 321 String decodedUrl = Uri.decode(url); 322 if (decodedUrl != null) { 323 int queryIndex = decodedUrl.indexOf('?'); 324 // If there is a query string strip it, same as desktop browsers 325 if (queryIndex > 0) { 326 decodedUrl = decodedUrl.substring(0, queryIndex); 327 } 328 if (!decodedUrl.endsWith("/")) { 329 int index = decodedUrl.lastIndexOf('/') + 1; 330 if (index > 0) { 331 filename = decodedUrl.substring(index); 332 } 333 } 334 } 335 } 336 337 // Finally, if couldn't get filename from URI, get a generic filename 338 if (filename == null) { 339 filename = "downloadfile"; 340 } 341 342 // Split filename between base and extension 343 // Add an extension if filename does not have one 344 int dotIndex = filename.indexOf('.'); 345 if (dotIndex < 0) { 346 if (mimeType != null) { 347 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 348 if (extension != null) { 349 extension = "." + extension; 350 } 351 } 352 if (extension == null) { 353 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) { 354 if (mimeType.equalsIgnoreCase("text/html")) { 355 extension = ".html"; 356 } else { 357 extension = ".txt"; 358 } 359 } else { 360 extension = ".bin"; 361 } 362 } 363 } else { 364 if (mimeType != null) { 365 // Compare the last segment of the extension against the mime type. 366 // If there's a mismatch, discard the entire extension. 367 int lastDotIndex = filename.lastIndexOf('.'); 368 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension( 369 filename.substring(lastDotIndex + 1)); 370 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) { 371 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 372 if (extension != null) { 373 extension = "." + extension; 374 } 375 } 376 } 377 if (extension == null) { 378 extension = filename.substring(dotIndex); 379 } 380 filename = filename.substring(0, dotIndex); 381 } 382 383 return filename + extension; 384 } 385 386 /** Regex used to parse content-disposition headers */ 387 private static final Pattern CONTENT_DISPOSITION_PATTERN = 388 Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$", 389 Pattern.CASE_INSENSITIVE); 390 391 /* 392 * Parse the Content-Disposition HTTP Header. The format of the header 393 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html 394 * This header provides a filename for content that is going to be 395 * downloaded to the file system. We only support the attachment type. 396 * Note that RFC 2616 specifies the filename value must be double-quoted. 397 * Unfortunately some servers do not quote the value so to maintain 398 * consistent behaviour with other browsers, we allow unquoted values too. 399 */ 400 static String parseContentDisposition(String contentDisposition) { 401 try { 402 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); 403 if (m.find()) { 404 return m.group(2); 405 } 406 } catch (IllegalStateException ex) { 407 // This function is defined as returning null when it can't parse the header 408 } 409 return null; 410 } 411 } 412