Home | History | Annotate | Download | only in webkit
      1 /*
      2  * Copyright (C) 2006 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.webkit;
     18 
     19 import java.io.UnsupportedEncodingException;
     20 import java.util.Locale;
     21 import java.util.regex.Matcher;
     22 import java.util.regex.Pattern;
     23 
     24 import android.net.Uri;
     25 import android.net.ParseException;
     26 import android.net.WebAddress;
     27 import android.util.Log;
     28 
     29 public final class URLUtil {
     30 
     31     private static final String LOGTAG = "webkit";
     32 
     33     // to refer to bar.png under your package's asset/foo/ directory, use
     34     // "file:///android_asset/foo/bar.png".
     35     static final String ASSET_BASE = "file:///android_asset/";
     36     // to refer to bar.png under your package's res/drawable/ directory, use
     37     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
     38     // "drawable-hdpi" directory as well.
     39     static final String RESOURCE_BASE = "file:///android_res/";
     40     static final String FILE_BASE = "file://";
     41     static final String PROXY_BASE = "file:///cookieless_proxy/";
     42     static final String CONTENT_BASE = "content:";
     43 
     44     /**
     45      * Cleans up (if possible) user-entered web addresses
     46      */
     47     public static String guessUrl(String inUrl) {
     48 
     49         String retVal = inUrl;
     50         WebAddress webAddress;
     51 
     52         if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
     53 
     54         if (inUrl.length() == 0) return inUrl;
     55         if (inUrl.startsWith("about:")) return inUrl;
     56         // Do not try to interpret data scheme URLs
     57         if (inUrl.startsWith("data:")) return inUrl;
     58         // Do not try to interpret file scheme URLs
     59         if (inUrl.startsWith("file:")) return inUrl;
     60         // Do not try to interpret javascript scheme URLs
     61         if (inUrl.startsWith("javascript:")) return inUrl;
     62 
     63         // bug 762454: strip period off end of url
     64         if (inUrl.endsWith(".") == true) {
     65             inUrl = inUrl.substring(0, inUrl.length() - 1);
     66         }
     67 
     68         try {
     69             webAddress = new WebAddress(inUrl);
     70         } catch (ParseException ex) {
     71 
     72             if (DebugFlags.URL_UTIL) {
     73                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
     74             }
     75             return retVal;
     76         }
     77 
     78         // Check host
     79         if (webAddress.getHost().indexOf('.') == -1) {
     80             // no dot: user probably entered a bare domain.  try .com
     81             webAddress.setHost("www." + webAddress.getHost() + ".com");
     82         }
     83         return webAddress.toString();
     84     }
     85 
     86     public static String composeSearchUrl(String inQuery, String template,
     87                                           String queryPlaceHolder) {
     88         int placeHolderIndex = template.indexOf(queryPlaceHolder);
     89         if (placeHolderIndex < 0) {
     90             return null;
     91         }
     92 
     93         String query;
     94         StringBuilder buffer = new StringBuilder();
     95         buffer.append(template.substring(0, placeHolderIndex));
     96 
     97         try {
     98             query = java.net.URLEncoder.encode(inQuery, "utf-8");
     99             buffer.append(query);
    100         } catch (UnsupportedEncodingException ex) {
    101             return null;
    102         }
    103 
    104         buffer.append(template.substring(
    105                 placeHolderIndex + queryPlaceHolder.length()));
    106 
    107         return buffer.toString();
    108     }
    109 
    110     public static byte[] decode(byte[] url) throws IllegalArgumentException {
    111         if (url.length == 0) {
    112             return new byte[0];
    113         }
    114 
    115         // Create a new byte array with the same length to ensure capacity
    116         byte[] tempData = new byte[url.length];
    117 
    118         int tempCount = 0;
    119         for (int i = 0; i < url.length; i++) {
    120             byte b = url[i];
    121             if (b == '%') {
    122                 if (url.length - i > 2) {
    123                     b = (byte) (parseHex(url[i + 1]) * 16
    124                             + parseHex(url[i + 2]));
    125                     i += 2;
    126                 } else {
    127                     throw new IllegalArgumentException("Invalid format");
    128                 }
    129             }
    130             tempData[tempCount++] = b;
    131         }
    132         byte[] retData = new byte[tempCount];
    133         System.arraycopy(tempData, 0, retData, 0, tempCount);
    134         return retData;
    135     }
    136 
    137     /**
    138      * @return True iff the url is correctly URL encoded
    139      */
    140     static boolean verifyURLEncoding(String url) {
    141         int count = url.length();
    142         if (count == 0) {
    143             return false;
    144         }
    145 
    146         int index = url.indexOf('%');
    147         while (index >= 0 && index < count) {
    148             if (index < count - 2) {
    149                 try {
    150                     parseHex((byte) url.charAt(++index));
    151                     parseHex((byte) url.charAt(++index));
    152                 } catch (IllegalArgumentException e) {
    153                     return false;
    154                 }
    155             } else {
    156                 return false;
    157             }
    158             index = url.indexOf('%', index + 1);
    159         }
    160         return true;
    161     }
    162 
    163     private static int parseHex(byte b) {
    164         if (b >= '0' && b <= '9') return (b - '0');
    165         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
    166         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
    167 
    168         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
    169     }
    170 
    171     /**
    172      * @return True iff the url is an asset file.
    173      */
    174     public static boolean isAssetUrl(String url) {
    175         return (null != url) && url.startsWith(ASSET_BASE);
    176     }
    177 
    178     /**
    179      * @return True iff the url is a resource file.
    180      * @hide
    181      */
    182     public static boolean isResourceUrl(String url) {
    183         return (null != url) && url.startsWith(RESOURCE_BASE);
    184     }
    185 
    186     /**
    187      * @return True iff the url is a proxy url to allow cookieless network
    188      * requests from a file url.
    189      * @deprecated Cookieless proxy is no longer supported.
    190      */
    191     @Deprecated
    192     public static boolean isCookielessProxyUrl(String url) {
    193         return (null != url) && url.startsWith(PROXY_BASE);
    194     }
    195 
    196     /**
    197      * @return True iff the url is a local file.
    198      */
    199     public static boolean isFileUrl(String url) {
    200         return (null != url) && (url.startsWith(FILE_BASE) &&
    201                                  !url.startsWith(ASSET_BASE) &&
    202                                  !url.startsWith(PROXY_BASE));
    203     }
    204 
    205     /**
    206      * @return True iff the url is an about: url.
    207      */
    208     public static boolean isAboutUrl(String url) {
    209         return (null != url) && url.startsWith("about:");
    210     }
    211 
    212     /**
    213      * @return True iff the url is a data: url.
    214      */
    215     public static boolean isDataUrl(String url) {
    216         return (null != url) && url.startsWith("data:");
    217     }
    218 
    219     /**
    220      * @return True iff the url is a javascript: url.
    221      */
    222     public static boolean isJavaScriptUrl(String url) {
    223         return (null != url) && url.startsWith("javascript:");
    224     }
    225 
    226     /**
    227      * @return True iff the url is an http: url.
    228      */
    229     public static boolean isHttpUrl(String url) {
    230         return (null != url) &&
    231                (url.length() > 6) &&
    232                url.substring(0, 7).equalsIgnoreCase("http://");
    233     }
    234 
    235     /**
    236      * @return True iff the url is an https: url.
    237      */
    238     public static boolean isHttpsUrl(String url) {
    239         return (null != url) &&
    240                (url.length() > 7) &&
    241                url.substring(0, 8).equalsIgnoreCase("https://");
    242     }
    243 
    244     /**
    245      * @return True iff the url is a network url.
    246      */
    247     public static boolean isNetworkUrl(String url) {
    248         if (url == null || url.length() == 0) {
    249             return false;
    250         }
    251         return isHttpUrl(url) || isHttpsUrl(url);
    252     }
    253 
    254     /**
    255      * @return True iff the url is a content: url.
    256      */
    257     public static boolean isContentUrl(String url) {
    258         return (null != url) && url.startsWith(CONTENT_BASE);
    259     }
    260 
    261     /**
    262      * @return True iff the url is valid.
    263      */
    264     public static boolean isValidUrl(String url) {
    265         if (url == null || url.length() == 0) {
    266             return false;
    267         }
    268 
    269         return (isAssetUrl(url) ||
    270                 isResourceUrl(url) ||
    271                 isFileUrl(url) ||
    272                 isAboutUrl(url) ||
    273                 isHttpUrl(url) ||
    274                 isHttpsUrl(url) ||
    275                 isJavaScriptUrl(url) ||
    276                 isContentUrl(url));
    277     }
    278 
    279     /**
    280      * Strips the url of the anchor.
    281      */
    282     public static String stripAnchor(String url) {
    283         int anchorIndex = url.indexOf('#');
    284         if (anchorIndex != -1) {
    285             return url.substring(0, anchorIndex);
    286         }
    287         return url;
    288     }
    289 
    290     /**
    291      * Guesses canonical filename that a download would have, using
    292      * the URL and contentDisposition. File extension, if not defined,
    293      * is added based on the mimetype
    294      * @param url Url to the content
    295      * @param contentDisposition Content-Disposition HTTP header or null
    296      * @param mimeType Mime-type of the content or null
    297      *
    298      * @return suggested filename
    299      */
    300     public static final String guessFileName(
    301             String url,
    302             String contentDisposition,
    303             String mimeType) {
    304         String filename = null;
    305         String extension = null;
    306 
    307         // If we couldn't do anything with the hint, move toward the content disposition
    308         if (filename == null && contentDisposition != null) {
    309             filename = parseContentDisposition(contentDisposition);
    310             if (filename != null) {
    311                 int index = filename.lastIndexOf('/') + 1;
    312                 if (index > 0) {
    313                     filename = filename.substring(index);
    314                 }
    315             }
    316         }
    317 
    318         // If all the other http-related approaches failed, use the plain uri
    319         if (filename == null) {
    320             String decodedUrl = Uri.decode(url);
    321             if (decodedUrl != null) {
    322                 int queryIndex = decodedUrl.indexOf('?');
    323                 // If there is a query string strip it, same as desktop browsers
    324                 if (queryIndex > 0) {
    325                     decodedUrl = decodedUrl.substring(0, queryIndex);
    326                 }
    327                 if (!decodedUrl.endsWith("/")) {
    328                     int index = decodedUrl.lastIndexOf('/') + 1;
    329                     if (index > 0) {
    330                         filename = decodedUrl.substring(index);
    331                     }
    332                 }
    333             }
    334         }
    335 
    336         // Finally, if couldn't get filename from URI, get a generic filename
    337         if (filename == null) {
    338             filename = "downloadfile";
    339         }
    340 
    341         // Split filename between base and extension
    342         // Add an extension if filename does not have one
    343         int dotIndex = filename.indexOf('.');
    344         if (dotIndex < 0) {
    345             if (mimeType != null) {
    346                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    347                 if (extension != null) {
    348                     extension = "." + extension;
    349                 }
    350             }
    351             if (extension == null) {
    352                 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
    353                     if (mimeType.equalsIgnoreCase("text/html")) {
    354                         extension = ".html";
    355                     } else {
    356                         extension = ".txt";
    357                     }
    358                 } else {
    359                     extension = ".bin";
    360                 }
    361             }
    362         } else {
    363             if (mimeType != null) {
    364                 // Compare the last segment of the extension against the mime type.
    365                 // If there's a mismatch, discard the entire extension.
    366                 int lastDotIndex = filename.lastIndexOf('.');
    367                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
    368                         filename.substring(lastDotIndex + 1));
    369                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
    370                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    371                     if (extension != null) {
    372                         extension = "." + extension;
    373                     }
    374                 }
    375             }
    376             if (extension == null) {
    377                 extension = filename.substring(dotIndex);
    378             }
    379             filename = filename.substring(0, dotIndex);
    380         }
    381 
    382         return filename + extension;
    383     }
    384 
    385     /** Regex used to parse content-disposition headers */
    386     private static final Pattern CONTENT_DISPOSITION_PATTERN =
    387             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
    388             Pattern.CASE_INSENSITIVE);
    389 
    390     /*
    391      * Parse the Content-Disposition HTTP Header. The format of the header
    392      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
    393      * This header provides a filename for content that is going to be
    394      * downloaded to the file system. We only support the attachment type.
    395      * Note that RFC 2616 specifies the filename value must be double-quoted.
    396      * Unfortunately some servers do not quote the value so to maintain
    397      * consistent behaviour with other browsers, we allow unquoted values too.
    398      */
    399     static String parseContentDisposition(String contentDisposition) {
    400         try {
    401             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
    402             if (m.find()) {
    403                 return m.group(2);
    404             }
    405         } catch (IllegalStateException ex) {
    406              // This function is defined as returning null when it can't parse the header
    407         }
    408         return null;
    409     }
    410 }
    411