Home | History | Annotate | Download | only in webkit
      1 /*
      2  * Copyright (C) 2006 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.webkit;
     18 
     19 import java.io.UnsupportedEncodingException;
     20 import java.util.regex.Matcher;
     21 import java.util.regex.Pattern;
     22 
     23 import android.net.Uri;
     24 import android.net.ParseException;
     25 import android.net.WebAddress;
     26 import android.util.Log;
     27 
     28 public final class URLUtil {
     29 
     30     private static final String LOGTAG = "webkit";
     31 
     32     // to refer to bar.png under your package's asset/foo/ directory, use
     33     // "file:///android_asset/foo/bar.png".
     34     static final String ASSET_BASE = "file:///android_asset/";
     35     // to refer to bar.png under your package's res/drawable/ directory, use
     36     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
     37     // "drawable-hdpi" directory as well.
     38     static final String RESOURCE_BASE = "file:///android_res/";
     39     static final String FILE_BASE = "file://";
     40     static final String PROXY_BASE = "file:///cookieless_proxy/";
     41     static final String CONTENT_BASE = "content:";
     42 
     43     /**
     44      * Cleans up (if possible) user-entered web addresses
     45      */
     46     public static String guessUrl(String inUrl) {
     47 
     48         String retVal = inUrl;
     49         WebAddress webAddress;
     50 
     51         if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
     52 
     53         if (inUrl.length() == 0) return inUrl;
     54         if (inUrl.startsWith("about:")) return inUrl;
     55         // Do not try to interpret data scheme URLs
     56         if (inUrl.startsWith("data:")) return inUrl;
     57         // Do not try to interpret file scheme URLs
     58         if (inUrl.startsWith("file:")) return inUrl;
     59         // Do not try to interpret javascript scheme URLs
     60         if (inUrl.startsWith("javascript:")) return inUrl;
     61 
     62         // bug 762454: strip period off end of url
     63         if (inUrl.endsWith(".") == true) {
     64             inUrl = inUrl.substring(0, inUrl.length() - 1);
     65         }
     66 
     67         try {
     68             webAddress = new WebAddress(inUrl);
     69         } catch (ParseException ex) {
     70 
     71             if (DebugFlags.URL_UTIL) {
     72                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
     73             }
     74             return retVal;
     75         }
     76 
     77         // Check host
     78         if (webAddress.getHost().indexOf('.') == -1) {
     79             // no dot: user probably entered a bare domain.  try .com
     80             webAddress.setHost("www." + webAddress.getHost() + ".com");
     81         }
     82         return webAddress.toString();
     83     }
     84 
     85     public static String composeSearchUrl(String inQuery, String template,
     86                                           String queryPlaceHolder) {
     87         int placeHolderIndex = template.indexOf(queryPlaceHolder);
     88         if (placeHolderIndex < 0) {
     89             return null;
     90         }
     91 
     92         String query;
     93         StringBuilder buffer = new StringBuilder();
     94         buffer.append(template.substring(0, placeHolderIndex));
     95 
     96         try {
     97             query = java.net.URLEncoder.encode(inQuery, "utf-8");
     98             buffer.append(query);
     99         } catch (UnsupportedEncodingException ex) {
    100             return null;
    101         }
    102 
    103         buffer.append(template.substring(
    104                 placeHolderIndex + queryPlaceHolder.length()));
    105 
    106         return buffer.toString();
    107     }
    108 
    109     public static byte[] decode(byte[] url) throws IllegalArgumentException {
    110         if (url.length == 0) {
    111             return new byte[0];
    112         }
    113 
    114         // Create a new byte array with the same length to ensure capacity
    115         byte[] tempData = new byte[url.length];
    116 
    117         int tempCount = 0;
    118         for (int i = 0; i < url.length; i++) {
    119             byte b = url[i];
    120             if (b == '%') {
    121                 if (url.length - i > 2) {
    122                     b = (byte) (parseHex(url[i + 1]) * 16
    123                             + parseHex(url[i + 2]));
    124                     i += 2;
    125                 } else {
    126                     throw new IllegalArgumentException("Invalid format");
    127                 }
    128             }
    129             tempData[tempCount++] = b;
    130         }
    131         byte[] retData = new byte[tempCount];
    132         System.arraycopy(tempData, 0, retData, 0, tempCount);
    133         return retData;
    134     }
    135 
    136     /**
    137      * @return True iff the url is correctly URL encoded
    138      */
    139     static boolean verifyURLEncoding(String url) {
    140         int count = url.length();
    141         if (count == 0) {
    142             return false;
    143         }
    144 
    145         int index = url.indexOf('%');
    146         while (index >= 0 && index < count) {
    147             if (index < count - 2) {
    148                 try {
    149                     parseHex((byte) url.charAt(++index));
    150                     parseHex((byte) url.charAt(++index));
    151                 } catch (IllegalArgumentException e) {
    152                     return false;
    153                 }
    154             } else {
    155                 return false;
    156             }
    157             index = url.indexOf('%', index + 1);
    158         }
    159         return true;
    160     }
    161 
    162     private static int parseHex(byte b) {
    163         if (b >= '0' && b <= '9') return (b - '0');
    164         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
    165         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
    166 
    167         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
    168     }
    169 
    170     /**
    171      * @return True iff the url is an asset file.
    172      */
    173     public static boolean isAssetUrl(String url) {
    174         return (null != url) && url.startsWith(ASSET_BASE);
    175     }
    176 
    177     /**
    178      * @return True iff the url is a resource file.
    179      * @hide
    180      */
    181     public static boolean isResourceUrl(String url) {
    182         return (null != url) && url.startsWith(RESOURCE_BASE);
    183     }
    184 
    185     /**
    186      * @return True iff the url is a proxy url to allow cookieless network
    187      * requests from a file url.
    188      * @deprecated Cookieless proxy is no longer supported.
    189      */
    190     @Deprecated
    191     public static boolean isCookielessProxyUrl(String url) {
    192         return (null != url) && url.startsWith(PROXY_BASE);
    193     }
    194 
    195     /**
    196      * @return True iff the url is a local file.
    197      */
    198     public static boolean isFileUrl(String url) {
    199         return (null != url) && (url.startsWith(FILE_BASE) &&
    200                                  !url.startsWith(ASSET_BASE) &&
    201                                  !url.startsWith(PROXY_BASE));
    202     }
    203 
    204     /**
    205      * @return True iff the url is an about: url.
    206      */
    207     public static boolean isAboutUrl(String url) {
    208         return (null != url) && url.startsWith("about:");
    209     }
    210 
    211     /**
    212      * @return True iff the url is a data: url.
    213      */
    214     public static boolean isDataUrl(String url) {
    215         return (null != url) && url.startsWith("data:");
    216     }
    217 
    218     /**
    219      * @return True iff the url is a javascript: url.
    220      */
    221     public static boolean isJavaScriptUrl(String url) {
    222         return (null != url) && url.startsWith("javascript:");
    223     }
    224 
    225     /**
    226      * @return True iff the url is an http: url.
    227      */
    228     public static boolean isHttpUrl(String url) {
    229         return (null != url) &&
    230                (url.length() > 6) &&
    231                url.substring(0, 7).equalsIgnoreCase("http://");
    232     }
    233 
    234     /**
    235      * @return True iff the url is an https: url.
    236      */
    237     public static boolean isHttpsUrl(String url) {
    238         return (null != url) &&
    239                (url.length() > 7) &&
    240                url.substring(0, 8).equalsIgnoreCase("https://");
    241     }
    242 
    243     /**
    244      * @return True iff the url is a network url.
    245      */
    246     public static boolean isNetworkUrl(String url) {
    247         if (url == null || url.length() == 0) {
    248             return false;
    249         }
    250         return isHttpUrl(url) || isHttpsUrl(url);
    251     }
    252 
    253     /**
    254      * @return True iff the url is a content: url.
    255      */
    256     public static boolean isContentUrl(String url) {
    257         return (null != url) && url.startsWith(CONTENT_BASE);
    258     }
    259 
    260     /**
    261      * @return True iff the url is valid.
    262      */
    263     public static boolean isValidUrl(String url) {
    264         if (url == null || url.length() == 0) {
    265             return false;
    266         }
    267 
    268         return (isAssetUrl(url) ||
    269                 isResourceUrl(url) ||
    270                 isFileUrl(url) ||
    271                 isAboutUrl(url) ||
    272                 isHttpUrl(url) ||
    273                 isHttpsUrl(url) ||
    274                 isJavaScriptUrl(url) ||
    275                 isContentUrl(url));
    276     }
    277 
    278     /**
    279      * Strips the url of the anchor.
    280      */
    281     public static String stripAnchor(String url) {
    282         int anchorIndex = url.indexOf('#');
    283         if (anchorIndex != -1) {
    284             return url.substring(0, anchorIndex);
    285         }
    286         return url;
    287     }
    288 
    289     /**
    290      * Guesses canonical filename that a download would have, using
    291      * the URL and contentDisposition. File extension, if not defined,
    292      * is added based on the mimetype
    293      * @param url Url to the content
    294      * @param contentDisposition Content-Disposition HTTP header or null
    295      * @param mimeType Mime-type of the content or null
    296      *
    297      * @return suggested filename
    298      */
    299     public static final String guessFileName(
    300             String url,
    301             String contentDisposition,
    302             String mimeType) {
    303         String filename = null;
    304         String extension = null;
    305 
    306         // If we couldn't do anything with the hint, move toward the content disposition
    307         if (filename == null && contentDisposition != null) {
    308             filename = parseContentDisposition(contentDisposition);
    309             if (filename != null) {
    310                 int index = filename.lastIndexOf('/') + 1;
    311                 if (index > 0) {
    312                     filename = filename.substring(index);
    313                 }
    314             }
    315         }
    316 
    317         // If all the other http-related approaches failed, use the plain uri
    318         if (filename == null) {
    319             String decodedUrl = Uri.decode(url);
    320             if (decodedUrl != null) {
    321                 int queryIndex = decodedUrl.indexOf('?');
    322                 // If there is a query string strip it, same as desktop browsers
    323                 if (queryIndex > 0) {
    324                     decodedUrl = decodedUrl.substring(0, queryIndex);
    325                 }
    326                 if (!decodedUrl.endsWith("/")) {
    327                     int index = decodedUrl.lastIndexOf('/') + 1;
    328                     if (index > 0) {
    329                         filename = decodedUrl.substring(index);
    330                     }
    331                 }
    332             }
    333         }
    334 
    335         // Finally, if couldn't get filename from URI, get a generic filename
    336         if (filename == null) {
    337             filename = "downloadfile";
    338         }
    339 
    340         // Split filename between base and extension
    341         // Add an extension if filename does not have one
    342         int dotIndex = filename.indexOf('.');
    343         if (dotIndex < 0) {
    344             if (mimeType != null) {
    345                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    346                 if (extension != null) {
    347                     extension = "." + extension;
    348                 }
    349             }
    350             if (extension == null) {
    351                 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
    352                     if (mimeType.equalsIgnoreCase("text/html")) {
    353                         extension = ".html";
    354                     } else {
    355                         extension = ".txt";
    356                     }
    357                 } else {
    358                     extension = ".bin";
    359                 }
    360             }
    361         } else {
    362             if (mimeType != null) {
    363                 // Compare the last segment of the extension against the mime type.
    364                 // If there's a mismatch, discard the entire extension.
    365                 int lastDotIndex = filename.lastIndexOf('.');
    366                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
    367                         filename.substring(lastDotIndex + 1));
    368                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
    369                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    370                     if (extension != null) {
    371                         extension = "." + extension;
    372                     }
    373                 }
    374             }
    375             if (extension == null) {
    376                 extension = filename.substring(dotIndex);
    377             }
    378             filename = filename.substring(0, dotIndex);
    379         }
    380 
    381         return filename + extension;
    382     }
    383 
    384     /** Regex used to parse content-disposition headers */
    385     private static final Pattern CONTENT_DISPOSITION_PATTERN =
    386             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
    387             Pattern.CASE_INSENSITIVE);
    388 
    389     /*
    390      * Parse the Content-Disposition HTTP Header. The format of the header
    391      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
    392      * This header provides a filename for content that is going to be
    393      * downloaded to the file system. We only support the attachment type.
    394      * Note that RFC 2616 specifies the filename value must be double-quoted.
    395      * Unfortunately some servers do not quote the value so to maintain
    396      * consistent behaviour with other browsers, we allow unquoted values too.
    397      */
    398     static String parseContentDisposition(String contentDisposition) {
    399         try {
    400             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
    401             if (m.find()) {
    402                 return m.group(2);
    403             }
    404         } catch (IllegalStateException ex) {
    405              // This function is defined as returning null when it can't parse the header
    406         }
    407         return null;
    408     }
    409 }
    410