Home | History | Annotate | Download | only in webkit
      1 /*
      2  * Copyright (C) 2006 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.webkit;
     18 
     19 import java.io.UnsupportedEncodingException;
     20 import java.util.Locale;
     21 import java.util.regex.Matcher;
     22 import java.util.regex.Pattern;
     23 
     24 import android.net.Uri;
     25 import android.net.ParseException;
     26 import android.net.WebAddress;
     27 import android.util.Log;
     28 
     29 public final class URLUtil {
     30 
     31     private static final String LOGTAG = "webkit";
     32     private static final boolean TRACE = false;
     33 
     34     // to refer to bar.png under your package's asset/foo/ directory, use
     35     // "file:///android_asset/foo/bar.png".
     36     static final String ASSET_BASE = "file:///android_asset/";
     37     // to refer to bar.png under your package's res/drawable/ directory, use
     38     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
     39     // "drawable-hdpi" directory as well.
     40     static final String RESOURCE_BASE = "file:///android_res/";
     41     static final String FILE_BASE = "file://";
     42     static final String PROXY_BASE = "file:///cookieless_proxy/";
     43     static final String CONTENT_BASE = "content:";
     44 
     45     /**
     46      * Cleans up (if possible) user-entered web addresses
     47      */
     48     public static String guessUrl(String inUrl) {
     49 
     50         String retVal = inUrl;
     51         WebAddress webAddress;
     52 
     53         if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
     54 
     55         if (inUrl.length() == 0) return inUrl;
     56         if (inUrl.startsWith("about:")) return inUrl;
     57         // Do not try to interpret data scheme URLs
     58         if (inUrl.startsWith("data:")) return inUrl;
     59         // Do not try to interpret file scheme URLs
     60         if (inUrl.startsWith("file:")) return inUrl;
     61         // Do not try to interpret javascript scheme URLs
     62         if (inUrl.startsWith("javascript:")) return inUrl;
     63 
     64         // bug 762454: strip period off end of url
     65         if (inUrl.endsWith(".") == true) {
     66             inUrl = inUrl.substring(0, inUrl.length() - 1);
     67         }
     68 
     69         try {
     70             webAddress = new WebAddress(inUrl);
     71         } catch (ParseException ex) {
     72 
     73             if (TRACE) {
     74                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
     75             }
     76             return retVal;
     77         }
     78 
     79         // Check host
     80         if (webAddress.getHost().indexOf('.') == -1) {
     81             // no dot: user probably entered a bare domain.  try .com
     82             webAddress.setHost("www." + webAddress.getHost() + ".com");
     83         }
     84         return webAddress.toString();
     85     }
     86 
     87     public static String composeSearchUrl(String inQuery, String template,
     88                                           String queryPlaceHolder) {
     89         int placeHolderIndex = template.indexOf(queryPlaceHolder);
     90         if (placeHolderIndex < 0) {
     91             return null;
     92         }
     93 
     94         String query;
     95         StringBuilder buffer = new StringBuilder();
     96         buffer.append(template.substring(0, placeHolderIndex));
     97 
     98         try {
     99             query = java.net.URLEncoder.encode(inQuery, "utf-8");
    100             buffer.append(query);
    101         } catch (UnsupportedEncodingException ex) {
    102             return null;
    103         }
    104 
    105         buffer.append(template.substring(
    106                 placeHolderIndex + queryPlaceHolder.length()));
    107 
    108         return buffer.toString();
    109     }
    110 
    111     public static byte[] decode(byte[] url) throws IllegalArgumentException {
    112         if (url.length == 0) {
    113             return new byte[0];
    114         }
    115 
    116         // Create a new byte array with the same length to ensure capacity
    117         byte[] tempData = new byte[url.length];
    118 
    119         int tempCount = 0;
    120         for (int i = 0; i < url.length; i++) {
    121             byte b = url[i];
    122             if (b == '%') {
    123                 if (url.length - i > 2) {
    124                     b = (byte) (parseHex(url[i + 1]) * 16
    125                             + parseHex(url[i + 2]));
    126                     i += 2;
    127                 } else {
    128                     throw new IllegalArgumentException("Invalid format");
    129                 }
    130             }
    131             tempData[tempCount++] = b;
    132         }
    133         byte[] retData = new byte[tempCount];
    134         System.arraycopy(tempData, 0, retData, 0, tempCount);
    135         return retData;
    136     }
    137 
    138     /**
    139      * @return True iff the url is correctly URL encoded
    140      */
    141     static boolean verifyURLEncoding(String url) {
    142         int count = url.length();
    143         if (count == 0) {
    144             return false;
    145         }
    146 
    147         int index = url.indexOf('%');
    148         while (index >= 0 && index < count) {
    149             if (index < count - 2) {
    150                 try {
    151                     parseHex((byte) url.charAt(++index));
    152                     parseHex((byte) url.charAt(++index));
    153                 } catch (IllegalArgumentException e) {
    154                     return false;
    155                 }
    156             } else {
    157                 return false;
    158             }
    159             index = url.indexOf('%', index + 1);
    160         }
    161         return true;
    162     }
    163 
    164     private static int parseHex(byte b) {
    165         if (b >= '0' && b <= '9') return (b - '0');
    166         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
    167         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
    168 
    169         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
    170     }
    171 
    172     /**
    173      * @return True iff the url is an asset file.
    174      */
    175     public static boolean isAssetUrl(String url) {
    176         return (null != url) && url.startsWith(ASSET_BASE);
    177     }
    178 
    179     /**
    180      * @return True iff the url is a resource file.
    181      * @hide
    182      */
    183     public static boolean isResourceUrl(String url) {
    184         return (null != url) && url.startsWith(RESOURCE_BASE);
    185     }
    186 
    187     /**
    188      * @return True iff the url is a proxy url to allow cookieless network
    189      * requests from a file url.
    190      * @deprecated Cookieless proxy is no longer supported.
    191      */
    192     @Deprecated
    193     public static boolean isCookielessProxyUrl(String url) {
    194         return (null != url) && url.startsWith(PROXY_BASE);
    195     }
    196 
    197     /**
    198      * @return True iff the url is a local file.
    199      */
    200     public static boolean isFileUrl(String url) {
    201         return (null != url) && (url.startsWith(FILE_BASE) &&
    202                                  !url.startsWith(ASSET_BASE) &&
    203                                  !url.startsWith(PROXY_BASE));
    204     }
    205 
    206     /**
    207      * @return True iff the url is an about: url.
    208      */
    209     public static boolean isAboutUrl(String url) {
    210         return (null != url) && url.startsWith("about:");
    211     }
    212 
    213     /**
    214      * @return True iff the url is a data: url.
    215      */
    216     public static boolean isDataUrl(String url) {
    217         return (null != url) && url.startsWith("data:");
    218     }
    219 
    220     /**
    221      * @return True iff the url is a javascript: url.
    222      */
    223     public static boolean isJavaScriptUrl(String url) {
    224         return (null != url) && url.startsWith("javascript:");
    225     }
    226 
    227     /**
    228      * @return True iff the url is an http: url.
    229      */
    230     public static boolean isHttpUrl(String url) {
    231         return (null != url) &&
    232                (url.length() > 6) &&
    233                url.substring(0, 7).equalsIgnoreCase("http://");
    234     }
    235 
    236     /**
    237      * @return True iff the url is an https: url.
    238      */
    239     public static boolean isHttpsUrl(String url) {
    240         return (null != url) &&
    241                (url.length() > 7) &&
    242                url.substring(0, 8).equalsIgnoreCase("https://");
    243     }
    244 
    245     /**
    246      * @return True iff the url is a network url.
    247      */
    248     public static boolean isNetworkUrl(String url) {
    249         if (url == null || url.length() == 0) {
    250             return false;
    251         }
    252         return isHttpUrl(url) || isHttpsUrl(url);
    253     }
    254 
    255     /**
    256      * @return True iff the url is a content: url.
    257      */
    258     public static boolean isContentUrl(String url) {
    259         return (null != url) && url.startsWith(CONTENT_BASE);
    260     }
    261 
    262     /**
    263      * @return True iff the url is valid.
    264      */
    265     public static boolean isValidUrl(String url) {
    266         if (url == null || url.length() == 0) {
    267             return false;
    268         }
    269 
    270         return (isAssetUrl(url) ||
    271                 isResourceUrl(url) ||
    272                 isFileUrl(url) ||
    273                 isAboutUrl(url) ||
    274                 isHttpUrl(url) ||
    275                 isHttpsUrl(url) ||
    276                 isJavaScriptUrl(url) ||
    277                 isContentUrl(url));
    278     }
    279 
    280     /**
    281      * Strips the url of the anchor.
    282      */
    283     public static String stripAnchor(String url) {
    284         int anchorIndex = url.indexOf('#');
    285         if (anchorIndex != -1) {
    286             return url.substring(0, anchorIndex);
    287         }
    288         return url;
    289     }
    290 
    291     /**
    292      * Guesses canonical filename that a download would have, using
    293      * the URL and contentDisposition. File extension, if not defined,
    294      * is added based on the mimetype
    295      * @param url Url to the content
    296      * @param contentDisposition Content-Disposition HTTP header or null
    297      * @param mimeType Mime-type of the content or null
    298      *
    299      * @return suggested filename
    300      */
    301     public static final String guessFileName(
    302             String url,
    303             String contentDisposition,
    304             String mimeType) {
    305         String filename = null;
    306         String extension = null;
    307 
    308         // If we couldn't do anything with the hint, move toward the content disposition
    309         if (filename == null && contentDisposition != null) {
    310             filename = parseContentDisposition(contentDisposition);
    311             if (filename != null) {
    312                 int index = filename.lastIndexOf('/') + 1;
    313                 if (index > 0) {
    314                     filename = filename.substring(index);
    315                 }
    316             }
    317         }
    318 
    319         // If all the other http-related approaches failed, use the plain uri
    320         if (filename == null) {
    321             String decodedUrl = Uri.decode(url);
    322             if (decodedUrl != null) {
    323                 int queryIndex = decodedUrl.indexOf('?');
    324                 // If there is a query string strip it, same as desktop browsers
    325                 if (queryIndex > 0) {
    326                     decodedUrl = decodedUrl.substring(0, queryIndex);
    327                 }
    328                 if (!decodedUrl.endsWith("/")) {
    329                     int index = decodedUrl.lastIndexOf('/') + 1;
    330                     if (index > 0) {
    331                         filename = decodedUrl.substring(index);
    332                     }
    333                 }
    334             }
    335         }
    336 
    337         // Finally, if couldn't get filename from URI, get a generic filename
    338         if (filename == null) {
    339             filename = "downloadfile";
    340         }
    341 
    342         // Split filename between base and extension
    343         // Add an extension if filename does not have one
    344         int dotIndex = filename.indexOf('.');
    345         if (dotIndex < 0) {
    346             if (mimeType != null) {
    347                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    348                 if (extension != null) {
    349                     extension = "." + extension;
    350                 }
    351             }
    352             if (extension == null) {
    353                 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
    354                     if (mimeType.equalsIgnoreCase("text/html")) {
    355                         extension = ".html";
    356                     } else {
    357                         extension = ".txt";
    358                     }
    359                 } else {
    360                     extension = ".bin";
    361                 }
    362             }
    363         } else {
    364             if (mimeType != null) {
    365                 // Compare the last segment of the extension against the mime type.
    366                 // If there's a mismatch, discard the entire extension.
    367                 int lastDotIndex = filename.lastIndexOf('.');
    368                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
    369                         filename.substring(lastDotIndex + 1));
    370                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
    371                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    372                     if (extension != null) {
    373                         extension = "." + extension;
    374                     }
    375                 }
    376             }
    377             if (extension == null) {
    378                 extension = filename.substring(dotIndex);
    379             }
    380             filename = filename.substring(0, dotIndex);
    381         }
    382 
    383         return filename + extension;
    384     }
    385 
    386     /** Regex used to parse content-disposition headers */
    387     private static final Pattern CONTENT_DISPOSITION_PATTERN =
    388             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
    389             Pattern.CASE_INSENSITIVE);
    390 
    391     /*
    392      * Parse the Content-Disposition HTTP Header. The format of the header
    393      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
    394      * This header provides a filename for content that is going to be
    395      * downloaded to the file system. We only support the attachment type.
    396      * Note that RFC 2616 specifies the filename value must be double-quoted.
    397      * Unfortunately some servers do not quote the value so to maintain
    398      * consistent behaviour with other browsers, we allow unquoted values too.
    399      */
    400     static String parseContentDisposition(String contentDisposition) {
    401         try {
    402             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
    403             if (m.find()) {
    404                 return m.group(2);
    405             }
    406         } catch (IllegalStateException ex) {
    407              // This function is defined as returning null when it can't parse the header
    408         }
    409         return null;
    410     }
    411 }
    412