Home | History | Annotate | Download | only in webkit
      1 /*
      2  * Copyright (C) 2006 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.webkit;
     18 
     19 import java.io.UnsupportedEncodingException;
     20 import java.util.regex.Matcher;
     21 import java.util.regex.Pattern;
     22 
     23 import android.net.Uri;
     24 import android.net.ParseException;
     25 import android.net.WebAddress;
     26 import android.util.Log;
     27 
     28 public final class URLUtil {
     29 
     30     private static final String LOGTAG = "webkit";
     31 
     32     // to refer to bar.png under your package's asset/foo/ directory, use
     33     // "file:///android_asset/foo/bar.png".
     34     static final String ASSET_BASE = "file:///android_asset/";
     35     // to refer to bar.png under your package's res/drawable/ directory, use
     36     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
     37     // "drawable-hdpi" directory as well.
     38     static final String RESOURCE_BASE = "file:///android_res/";
     39     static final String FILE_BASE = "file://";
     40     static final String PROXY_BASE = "file:///cookieless_proxy/";
     41 
     42     /**
     43      * Cleans up (if possible) user-entered web addresses
     44      */
     45     public static String guessUrl(String inUrl) {
     46 
     47         String retVal = inUrl;
     48         WebAddress webAddress;
     49 
     50         Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
     51 
     52         if (inUrl.length() == 0) return inUrl;
     53         if (inUrl.startsWith("about:")) return inUrl;
     54         // Do not try to interpret data scheme URLs
     55         if (inUrl.startsWith("data:")) return inUrl;
     56         // Do not try to interpret file scheme URLs
     57         if (inUrl.startsWith("file:")) return inUrl;
     58         // Do not try to interpret javascript scheme URLs
     59         if (inUrl.startsWith("javascript:")) return inUrl;
     60 
     61         // bug 762454: strip period off end of url
     62         if (inUrl.endsWith(".") == true) {
     63             inUrl = inUrl.substring(0, inUrl.length() - 1);
     64         }
     65 
     66         try {
     67             webAddress = new WebAddress(inUrl);
     68         } catch (ParseException ex) {
     69 
     70             if (DebugFlags.URL_UTIL) {
     71                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
     72             }
     73             return retVal;
     74         }
     75 
     76         // Check host
     77         if (webAddress.mHost.indexOf('.') == -1) {
     78             // no dot: user probably entered a bare domain.  try .com
     79             webAddress.mHost = "www." + webAddress.mHost + ".com";
     80         }
     81         return webAddress.toString();
     82     }
     83 
     84     public static String composeSearchUrl(String inQuery, String template,
     85                                           String queryPlaceHolder) {
     86         int placeHolderIndex = template.indexOf(queryPlaceHolder);
     87         if (placeHolderIndex < 0) {
     88             return null;
     89         }
     90 
     91         String query;
     92         StringBuilder buffer = new StringBuilder();
     93         buffer.append(template.substring(0, placeHolderIndex));
     94 
     95         try {
     96             query = java.net.URLEncoder.encode(inQuery, "utf-8");
     97             buffer.append(query);
     98         } catch (UnsupportedEncodingException ex) {
     99             return null;
    100         }
    101 
    102         buffer.append(template.substring(
    103                 placeHolderIndex + queryPlaceHolder.length()));
    104 
    105         return buffer.toString();
    106     }
    107 
    108     public static byte[] decode(byte[] url) throws IllegalArgumentException {
    109         if (url.length == 0) {
    110             return new byte[0];
    111         }
    112 
    113         // Create a new byte array with the same length to ensure capacity
    114         byte[] tempData = new byte[url.length];
    115 
    116         int tempCount = 0;
    117         for (int i = 0; i < url.length; i++) {
    118             byte b = url[i];
    119             if (b == '%') {
    120                 if (url.length - i > 2) {
    121                     b = (byte) (parseHex(url[i + 1]) * 16
    122                             + parseHex(url[i + 2]));
    123                     i += 2;
    124                 } else {
    125                     throw new IllegalArgumentException("Invalid format");
    126                 }
    127             }
    128             tempData[tempCount++] = b;
    129         }
    130         byte[] retData = new byte[tempCount];
    131         System.arraycopy(tempData, 0, retData, 0, tempCount);
    132         return retData;
    133     }
    134 
    135     /**
    136      * @return True iff the url is correctly URL encoded
    137      */
    138     static boolean verifyURLEncoding(String url) {
    139         int count = url.length();
    140         if (count == 0) {
    141             return false;
    142         }
    143 
    144         int index = url.indexOf('%');
    145         while (index >= 0 && index < count) {
    146             if (index < count - 2) {
    147                 try {
    148                     parseHex((byte) url.charAt(++index));
    149                     parseHex((byte) url.charAt(++index));
    150                 } catch (IllegalArgumentException e) {
    151                     return false;
    152                 }
    153             } else {
    154                 return false;
    155             }
    156             index = url.indexOf('%', index + 1);
    157         }
    158         return true;
    159     }
    160 
    161     private static int parseHex(byte b) {
    162         if (b >= '0' && b <= '9') return (b - '0');
    163         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
    164         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
    165 
    166         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
    167     }
    168 
    169     /**
    170      * @return True iff the url is an asset file.
    171      */
    172     public static boolean isAssetUrl(String url) {
    173         return (null != url) && url.startsWith(ASSET_BASE);
    174     }
    175 
    176     /**
    177      * @return True iff the url is a resource file.
    178      * @hide
    179      */
    180     public static boolean isResourceUrl(String url) {
    181         return (null != url) && url.startsWith(RESOURCE_BASE);
    182     }
    183 
    184     /**
    185      * @return True iff the url is an proxy url to allow cookieless network
    186      * requests from a file url.
    187      * @deprecated Cookieless proxy is no longer supported.
    188      */
    189     @Deprecated
    190     public static boolean isCookielessProxyUrl(String url) {
    191         return (null != url) && url.startsWith(PROXY_BASE);
    192     }
    193 
    194     /**
    195      * @return True iff the url is a local file.
    196      */
    197     public static boolean isFileUrl(String url) {
    198         return (null != url) && (url.startsWith(FILE_BASE) &&
    199                                  !url.startsWith(ASSET_BASE) &&
    200                                  !url.startsWith(PROXY_BASE));
    201     }
    202 
    203     /**
    204      * @return True iff the url is an about: url.
    205      */
    206     public static boolean isAboutUrl(String url) {
    207         return (null != url) && url.startsWith("about:");
    208     }
    209 
    210     /**
    211      * @return True iff the url is a data: url.
    212      */
    213     public static boolean isDataUrl(String url) {
    214         return (null != url) && url.startsWith("data:");
    215     }
    216 
    217     /**
    218      * @return True iff the url is a javascript: url.
    219      */
    220     public static boolean isJavaScriptUrl(String url) {
    221         return (null != url) && url.startsWith("javascript:");
    222     }
    223 
    224     /**
    225      * @return True iff the url is an http: url.
    226      */
    227     public static boolean isHttpUrl(String url) {
    228         return (null != url) &&
    229                (url.length() > 6) &&
    230                url.substring(0, 7).equalsIgnoreCase("http://");
    231     }
    232 
    233     /**
    234      * @return True iff the url is an https: url.
    235      */
    236     public static boolean isHttpsUrl(String url) {
    237         return (null != url) &&
    238                (url.length() > 7) &&
    239                url.substring(0, 8).equalsIgnoreCase("https://");
    240     }
    241 
    242     /**
    243      * @return True iff the url is a network url.
    244      */
    245     public static boolean isNetworkUrl(String url) {
    246         if (url == null || url.length() == 0) {
    247             return false;
    248         }
    249         return isHttpUrl(url) || isHttpsUrl(url);
    250     }
    251 
    252     /**
    253      * @return True iff the url is a content: url.
    254      */
    255     public static boolean isContentUrl(String url) {
    256         return (null != url) && url.startsWith("content:");
    257     }
    258 
    259     /**
    260      * @return True iff the url is valid.
    261      */
    262     public static boolean isValidUrl(String url) {
    263         if (url == null || url.length() == 0) {
    264             return false;
    265         }
    266 
    267         return (isAssetUrl(url) ||
    268                 isResourceUrl(url) ||
    269                 isFileUrl(url) ||
    270                 isAboutUrl(url) ||
    271                 isHttpUrl(url) ||
    272                 isHttpsUrl(url) ||
    273                 isJavaScriptUrl(url) ||
    274                 isContentUrl(url));
    275     }
    276 
    277     /**
    278      * Strips the url of the anchor.
    279      */
    280     public static String stripAnchor(String url) {
    281         int anchorIndex = url.indexOf('#');
    282         if (anchorIndex != -1) {
    283             return url.substring(0, anchorIndex);
    284         }
    285         return url;
    286     }
    287 
    288     /**
    289      * Guesses canonical filename that a download would have, using
    290      * the URL and contentDisposition. File extension, if not defined,
    291      * is added based on the mimetype
    292      * @param url Url to the content
    293      * @param contentDisposition Content-Disposition HTTP header or null
    294      * @param mimeType Mime-type of the content or null
    295      *
    296      * @return suggested filename
    297      */
    298     public static final String guessFileName(
    299             String url,
    300             String contentDisposition,
    301             String mimeType) {
    302         String filename = null;
    303         String extension = null;
    304 
    305         // If we couldn't do anything with the hint, move toward the content disposition
    306         if (filename == null && contentDisposition != null) {
    307             filename = parseContentDisposition(contentDisposition);
    308             if (filename != null) {
    309                 int index = filename.lastIndexOf('/') + 1;
    310                 if (index > 0) {
    311                     filename = filename.substring(index);
    312                 }
    313             }
    314         }
    315 
    316         // If all the other http-related approaches failed, use the plain uri
    317         if (filename == null) {
    318             String decodedUrl = Uri.decode(url);
    319             if (decodedUrl != null) {
    320                 int queryIndex = decodedUrl.indexOf('?');
    321                 // If there is a query string strip it, same as desktop browsers
    322                 if (queryIndex > 0) {
    323                     decodedUrl = decodedUrl.substring(0, queryIndex);
    324                 }
    325                 if (!decodedUrl.endsWith("/")) {
    326                     int index = decodedUrl.lastIndexOf('/') + 1;
    327                     if (index > 0) {
    328                         filename = decodedUrl.substring(index);
    329                     }
    330                 }
    331             }
    332         }
    333 
    334         // Finally, if couldn't get filename from URI, get a generic filename
    335         if (filename == null) {
    336             filename = "downloadfile";
    337         }
    338 
    339         // Split filename between base and extension
    340         // Add an extension if filename does not have one
    341         int dotIndex = filename.indexOf('.');
    342         if (dotIndex < 0) {
    343             if (mimeType != null) {
    344                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    345                 if (extension != null) {
    346                     extension = "." + extension;
    347                 }
    348             }
    349             if (extension == null) {
    350                 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
    351                     if (mimeType.equalsIgnoreCase("text/html")) {
    352                         extension = ".html";
    353                     } else {
    354                         extension = ".txt";
    355                     }
    356                 } else {
    357                     extension = ".bin";
    358                 }
    359             }
    360         } else {
    361             if (mimeType != null) {
    362                 // Compare the last segment of the extension against the mime type.
    363                 // If there's a mismatch, discard the entire extension.
    364                 int lastDotIndex = filename.lastIndexOf('.');
    365                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
    366                         filename.substring(lastDotIndex + 1));
    367                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
    368                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
    369                     if (extension != null) {
    370                         extension = "." + extension;
    371                     }
    372                 }
    373             }
    374             if (extension == null) {
    375                 extension = filename.substring(dotIndex);
    376             }
    377             filename = filename.substring(0, dotIndex);
    378         }
    379 
    380         return filename + extension;
    381     }
    382 
    383     /** Regex used to parse content-disposition headers */
    384     private static final Pattern CONTENT_DISPOSITION_PATTERN =
    385             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
    386             Pattern.CASE_INSENSITIVE);
    387 
    388     /*
    389      * Parse the Content-Disposition HTTP Header. The format of the header
    390      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
    391      * This header provides a filename for content that is going to be
    392      * downloaded to the file system. We only support the attachment type.
    393      * Note that RFC 2616 specifies the filename value must be double-quoted.
    394      * Unfortunately some servers do not quote the value so to maintain
    395      * consistent behaviour with other browsers, we allow unquoted values too.
    396      */
    397     static String parseContentDisposition(String contentDisposition) {
    398         try {
    399             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
    400             if (m.find()) {
    401                 return m.group(2);
    402             }
    403         } catch (IllegalStateException ex) {
    404              // This function is defined as returning null when it can't parse the header
    405         }
    406         return null;
    407     }
    408 }
    409