Home | History | Annotate | Download | only in webkit
      1 /*
      2  * Copyright (C) 2006 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.webkit;
     18 
     19 import android.annotation.Nullable;
     20 import android.net.ParseException;
     21 import android.net.Uri;
     22 import android.net.WebAddress;
     23 import android.util.Log;
     24 
     25 import java.io.UnsupportedEncodingException;
     26 import java.util.Locale;
     27 import java.util.regex.Matcher;
     28 import java.util.regex.Pattern;
     29 
     30 public final class URLUtil {
     31 
     32     private static final String LOGTAG = "webkit";
     33     private static final boolean TRACE = false;
     34 
     35     // to refer to bar.png under your package's asset/foo/ directory, use
     36     // "file:///android_asset/foo/bar.png".
     37     static final String ASSET_BASE = "file:///android_asset/";
     38     // to refer to bar.png under your package's res/drawable/ directory, use
     39     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
     40     // "drawable-hdpi" directory as well.
     41     static final String RESOURCE_BASE = "file:///android_res/";
     42     static final String FILE_BASE = "file:";
     43     static final String PROXY_BASE = "file:///cookieless_proxy/";
     44     static final String CONTENT_BASE = "content:";
     45 
     46     /**
     47      * Cleans up (if possible) user-entered web addresses
     48      */
     49     public static String guessUrl(String inUrl) {
     50 
     51         String retVal = inUrl;
     52         WebAddress webAddress;
     53 
     54         if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
     55 
     56         if (inUrl.length() == 0) return inUrl;
     57         if (inUrl.startsWith("about:")) return inUrl;
     58         // Do not try to interpret data scheme URLs
     59         if (inUrl.startsWith("data:")) return inUrl;
     60         // Do not try to interpret file scheme URLs
     61         if (inUrl.startsWith("file:")) return inUrl;
     62         // Do not try to interpret javascript scheme URLs
     63         if (inUrl.startsWith("javascript:")) return inUrl;
     64 
     65         // bug 762454: strip period off end of url
     66         if (inUrl.endsWith(".") == true) {
     67             inUrl = inUrl.substring(0, inUrl.length() - 1);
     68         }
     69 
     70         try {
     71             webAddress = new WebAddress(inUrl);
     72         } catch (ParseException ex) {
     73 
     74             if (TRACE) {
     75                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
     76             }
     77             return retVal;
     78         }
     79 
     80         // Check host
     81         if (webAddress.getHost().indexOf('.') == -1) {
     82             // no dot: user probably entered a bare domain.  try .com
     83             webAddress.setHost("www." + webAddress.getHost() + ".com");
     84         }
     85         return webAddress.toString();
     86     }
     87 
     88     public static String composeSearchUrl(String inQuery, String template,
     89                                           String queryPlaceHolder) {
     90         int placeHolderIndex = template.indexOf(queryPlaceHolder);
     91         if (placeHolderIndex < 0) {
     92             return null;
     93         }
     94 
     95         String query;
     96         StringBuilder buffer = new StringBuilder();
     97         buffer.append(template.substring(0, placeHolderIndex));
     98 
     99         try {
    100             query = java.net.URLEncoder.encode(inQuery, "utf-8");
    101             buffer.append(query);
    102         } catch (UnsupportedEncodingException ex) {
    103             return null;
    104         }
    105 
    106         buffer.append(template.substring(
    107                 placeHolderIndex + queryPlaceHolder.length()));
    108 
    109         return buffer.toString();
    110     }
    111 
    112     public static byte[] decode(byte[] url) throws IllegalArgumentException {
    113         if (url.length == 0) {
    114             return new byte[0];
    115         }
    116 
    117         // Create a new byte array with the same length to ensure capacity
    118         byte[] tempData = new byte[url.length];
    119 
    120         int tempCount = 0;
    121         for (int i = 0; i < url.length; i++) {
    122             byte b = url[i];
    123             if (b == '%') {
    124                 if (url.length - i > 2) {
    125                     b = (byte) (parseHex(url[i + 1]) * 16
    126                             + parseHex(url[i + 2]));
    127                     i += 2;
    128                 } else {
    129                     throw new IllegalArgumentException("Invalid format");
    130                 }
    131             }
    132             tempData[tempCount++] = b;
    133         }
    134         byte[] retData = new byte[tempCount];
    135         System.arraycopy(tempData, 0, retData, 0, tempCount);
    136         return retData;
    137     }
    138 
    139     /**
    140      * @return {@code true} if the url is correctly URL encoded
    141      */
    142     static boolean verifyURLEncoding(String url) {
    143         int count = url.length();
    144         if (count == 0) {
    145             return false;
    146         }
    147 
    148         int index = url.indexOf('%');
    149         while (index >= 0 && index < count) {
    150             if (index < count - 2) {
    151                 try {
    152                     parseHex((byte) url.charAt(++index));
    153                     parseHex((byte) url.charAt(++index));
    154                 } catch (IllegalArgumentException e) {
    155                     return false;
    156                 }
    157             } else {
    158                 return false;
    159             }
    160             index = url.indexOf('%', index + 1);
    161         }
    162         return true;
    163     }
    164 
    165     private static int parseHex(byte b) {
    166         if (b >= '0' && b <= '9') return (b - '0');
    167         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
    168         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
    169 
    170         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
    171     }
    172 
    173     /**
    174      * @return {@code true} if the url is an asset file.
    175      */
    176     public static boolean isAssetUrl(String url) {
    177         return (null != url) && url.startsWith(ASSET_BASE);
    178     }
    179 
    180     /**
    181      * @return {@code true} if the url is a resource file.
    182      * @hide
    183      */
    184     public static boolean isResourceUrl(String url) {
    185         return (null != url) && url.startsWith(RESOURCE_BASE);
    186     }
    187 
    188     /**
    189      * @return {@code true} if the url is a proxy url to allow cookieless network
    190      * requests from a file url.
    191      * @deprecated Cookieless proxy is no longer supported.
    192      */
    193     @Deprecated
    194     public static boolean isCookielessProxyUrl(String url) {
    195         return (null != url) && url.startsWith(PROXY_BASE);
    196     }
    197 
    198     /**
    199      * @return {@code true} if the url is a local file.
    200      */
    201     public static boolean isFileUrl(String url) {
    202         return (null != url) && (url.startsWith(FILE_BASE) &&
    203                                  !url.startsWith(ASSET_BASE) &&
    204                                  !url.startsWith(PROXY_BASE));
    205     }
    206 
    207     /**
    208      * @return {@code true} if the url is an about: url.
    209      */
    210     public static boolean isAboutUrl(String url) {
    211         return (null != url) && url.startsWith("about:");
    212     }
    213 
    214     /**
    215      * @return {@code true} if the url is a data: url.
    216      */
    217     public static boolean isDataUrl(String url) {
    218         return (null != url) && url.startsWith("data:");
    219     }
    220 
    221     /**
    222      * @return {@code true} if the url is a javascript: url.
    223      */
    224     public static boolean isJavaScriptUrl(String url) {
    225         return (null != url) && url.startsWith("javascript:");
    226     }
    227 
    228     /**
    229      * @return {@code true} if the url is an http: url.
    230      */
    231     public static boolean isHttpUrl(String url) {
    232         return (null != url) &&
    233                (url.length() > 6) &&
    234                url.substring(0, 7).equalsIgnoreCase("http://");
    235     }
    236 
    237     /**
    238      * @return {@code true} if the url is an https: url.
    239      */
    240     public static boolean isHttpsUrl(String url) {
    241         return (null != url) &&
    242                (url.length() > 7) &&
    243                url.substring(0, 8).equalsIgnoreCase("https://");
    244     }
    245 
    246     /**
    247      * @return {@code true} if the url is a network url.
    248      */
    249     public static boolean isNetworkUrl(String url) {
    250         if (url == null || url.length() == 0) {
    251             return false;
    252         }
    253         return isHttpUrl(url) || isHttpsUrl(url);
    254     }
    255 
    256     /**
    257      * @return {@code true} if the url is a content: url.
    258      */
    259     public static boolean isContentUrl(String url) {
    260         return (null != url) && url.startsWith(CONTENT_BASE);
    261     }
    262 
    263     /**
    264      * @return {@code true} if the url is valid.
    265      */
    266     public static boolean isValidUrl(String url) {
    267         if (url == null || url.length() == 0) {
    268             return false;
    269         }
    270 
    271         return (isAssetUrl(url) ||
    272                 isResourceUrl(url) ||
    273                 isFileUrl(url) ||
    274                 isAboutUrl(url) ||
    275                 isHttpUrl(url) ||
    276                 isHttpsUrl(url) ||
    277                 isJavaScriptUrl(url) ||
    278                 isContentUrl(url));
    279     }
    280 
    281     /**
    282      * Strips the url of the anchor.
    283      */
    284     public static String stripAnchor(String url) {
    285         int anchorIndex = url.indexOf('#');
    286         if (anchorIndex != -1) {
    287             return url.substring(0, anchorIndex);
    288         }
    289         return url;
    290     }
    291 
    292     /**
    293      * Guesses canonical filename that a download would have, using
    294      * the URL and contentDisposition. File extension, if not defined,
    295      * is added based on the mimetype
    296      * @param url Url to the content
    297      * @param contentDisposition Content-Disposition HTTP header or {@code null}
    298      * @param mimeType Mime-type of the content or {@code null}
    299      *
    300      * @return suggested filename
    301      */
    302     public static final String guessFileName(
    303             String url,
    304             @Nullable String contentDisposition,
    305             @Nullable String mimeType) {
    306         String filename = null;
    307         String extension = null;
    308 
    309         // If we couldn't do anything with the hint, move toward the content disposition
    310         if (filename == null && contentDisposition != null) {
    311             filename = parseContentDisposition(contentDisposition);
    312             if (filename != null) {
    313                 int index = filename.lastIndexOf('/') + 1;
    314                 if (index > 0) {
    315                     filename = filename.substring(index);
    316                 }
    317             }
    318         }
    319 
    320         // If all the other http-related approaches failed, use the plain uri
    321         if (filename == null) {
    322             String decodedUrl = Uri.decode(url);
    323             if (decodedUrl != null) {
    324                 int queryIndex = decodedUrl.indexOf('?');
    325                 // If there is a query string strip it, same as desktop browsers
    326                 if (queryIndex > 0) {
    327                     decodedUrl = decodedUrl.substring(0, queryIndex);
    328                 }
    329                 if (!decodedUrl.endsWith("/")) {
    330                     int index = decodedUrl.lastIndexOf('/') + 1;
    331                     if (index > 0) {
    332                         filename = decodedUrl.substring(index);
    333                     }
    334                 }
    335             }
    336         }
    337 
    338         // Finally, if couldn't get filename from URI, get a generic filename
    339         if (filename == null) {
    340             filename = "downloadfile";
    341         }
    342 
    343         // Split filename between base and extension
    344         // Add an extension if filename does not have one
    345         int dotIndex = filename.indexOf('.');
    346         if (dotIndex < 0) {
    347             if (mimeType != null) {
    348                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    349                 if (extension != null) {
    350                     extension = "." + extension;
    351                 }
    352             }
    353             if (extension == null) {
    354                 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
    355                     if (mimeType.equalsIgnoreCase("text/html")) {
    356                         extension = ".html";
    357                     } else {
    358                         extension = ".txt";
    359                     }
    360                 } else {
    361                     extension = ".bin";
    362                 }
    363             }
    364         } else {
    365             if (mimeType != null) {
    366                 // Compare the last segment of the extension against the mime type.
    367                 // If there's a mismatch, discard the entire extension.
    368                 int lastDotIndex = filename.lastIndexOf('.');
    369                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
    370                         filename.substring(lastDotIndex + 1));
    371                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
    372                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    373                     if (extension != null) {
    374                         extension = "." + extension;
    375                     }
    376                 }
    377             }
    378             if (extension == null) {
    379                 extension = filename.substring(dotIndex);
    380             }
    381             filename = filename.substring(0, dotIndex);
    382         }
    383 
    384         return filename + extension;
    385     }
    386 
    387     /** Regex used to parse content-disposition headers */
    388     private static final Pattern CONTENT_DISPOSITION_PATTERN =
    389             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
    390             Pattern.CASE_INSENSITIVE);
    391 
    392     /**
    393      * Parse the Content-Disposition HTTP Header. The format of the header
    394      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
    395      * This header provides a filename for content that is going to be
    396      * downloaded to the file system. We only support the attachment type.
    397      * Note that RFC 2616 specifies the filename value must be double-quoted.
    398      * Unfortunately some servers do not quote the value so to maintain
    399      * consistent behaviour with other browsers, we allow unquoted values too.
    400      */
    401     static String parseContentDisposition(String contentDisposition) {
    402         try {
    403             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
    404             if (m.find()) {
    405                 return m.group(2);
    406             }
    407         } catch (IllegalStateException ex) {
    408              // This function is defined as returning null when it can't parse the header
    409         }
    410         return null;
    411     }
    412 }
    413