Home | History | Annotate | Download | only in browser
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.browser;
     18 
     19 import android.net.Uri;
     20 import android.util.Patterns;
     21 import android.webkit.URLUtil;
     22 
     23 import java.util.regex.Matcher;
     24 import java.util.regex.Pattern;
     25 
     26 /**
     27  * Utility methods for Url manipulation
     28  */
     29 public class UrlUtils {
     30 
     31     static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile(
     32             "(?i)" + // switch on case insensitive matching
     33             "(" +    // begin group for schema
     34             "(?:http|https|file):\\/\\/" +
     35             "|(?:inline|data|about|javascript):" +
     36             "|(?:.*:.*@)" +
     37             ")" +
     38             "(.*)" );
     39 
     40     // Google search
     41     private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s";
     42     private final static String QUERY_PLACE_HOLDER = "%s";
     43 
     44     // Regular expression to strip http:// and optionally
     45     // the trailing slash
     46     private static final Pattern STRIP_URL_PATTERN =
     47             Pattern.compile("^http://(.*?)/?$");
     48 
     49     private UrlUtils() { /* cannot be instantiated */ }
     50 
     51     /**
     52      * Strips the provided url of preceding "http://" and any trailing "/". Does not
     53      * strip "https://". If the provided string cannot be stripped, the original string
     54      * is returned.
     55      *
     56      * TODO: Put this in TextUtils to be used by other packages doing something similar.
     57      *
     58      * @param url a url to strip, like "http://www.google.com/"
     59      * @return a stripped url like "www.google.com", or the original string if it could
     60      *         not be stripped
     61      */
     62     public static String stripUrl(String url) {
     63         if (url == null) return null;
     64         Matcher m = STRIP_URL_PATTERN.matcher(url);
     65         if (m.matches()) {
     66             return m.group(1);
     67         } else {
     68             return url;
     69         }
     70     }
     71 
     72     protected static String smartUrlFilter(Uri inUri) {
     73         if (inUri != null) {
     74             return smartUrlFilter(inUri.toString());
     75         }
     76         return null;
     77     }
     78 
     79     /**
     80      * Attempts to determine whether user input is a URL or search
     81      * terms.  Anything with a space is passed to search.
     82      *
     83      * Converts to lowercase any mistakenly uppercased schema (i.e.,
     84      * "Http://" converts to "http://"
     85      *
     86      * @return Original or modified URL
     87      *
     88      */
     89     public static String smartUrlFilter(String url) {
     90         return smartUrlFilter(url, true);
     91     }
     92 
     93     /**
     94      * Attempts to determine whether user input is a URL or search
     95      * terms.  Anything with a space is passed to search if canBeSearch is true.
     96      *
     97      * Converts to lowercase any mistakenly uppercased schema (i.e.,
     98      * "Http://" converts to "http://"
     99      *
    100      * @param canBeSearch If true, will return a search url if it isn't a valid
    101      *                    URL. If false, invalid URLs will return null
    102      * @return Original or modified URL
    103      *
    104      */
    105     public static String smartUrlFilter(String url, boolean canBeSearch) {
    106         String inUrl = url.trim();
    107         boolean hasSpace = inUrl.indexOf(' ') != -1;
    108 
    109         Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl);
    110         if (matcher.matches()) {
    111             // force scheme to lowercase
    112             String scheme = matcher.group(1);
    113             String lcScheme = scheme.toLowerCase();
    114             if (!lcScheme.equals(scheme)) {
    115                 inUrl = lcScheme + matcher.group(2);
    116             }
    117             if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) {
    118                 inUrl = inUrl.replace(" ", "%20");
    119             }
    120             return inUrl;
    121         }
    122         if (!hasSpace) {
    123             if (Patterns.WEB_URL.matcher(inUrl).matches()) {
    124                 return URLUtil.guessUrl(inUrl);
    125             }
    126         }
    127         if (canBeSearch) {
    128             return URLUtil.composeSearchUrl(inUrl,
    129                     QUICKSEARCH_G, QUERY_PLACE_HOLDER);
    130         }
    131         return null;
    132     }
    133 
    134     /* package */ static String fixUrl(String inUrl) {
    135         // FIXME: Converting the url to lower case
    136         // duplicates functionality in smartUrlFilter().
    137         // However, changing all current callers of fixUrl to
    138         // call smartUrlFilter in addition may have unwanted
    139         // consequences, and is deferred for now.
    140         int colon = inUrl.indexOf(':');
    141         boolean allLower = true;
    142         for (int index = 0; index < colon; index++) {
    143             char ch = inUrl.charAt(index);
    144             if (!Character.isLetter(ch)) {
    145                 break;
    146             }
    147             allLower &= Character.isLowerCase(ch);
    148             if (index == colon - 1 && !allLower) {
    149                 inUrl = inUrl.substring(0, colon).toLowerCase()
    150                         + inUrl.substring(colon);
    151             }
    152         }
    153         if (inUrl.startsWith("http://") || inUrl.startsWith("https://"))
    154             return inUrl;
    155         if (inUrl.startsWith("http:") ||
    156                 inUrl.startsWith("https:")) {
    157             if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) {
    158                 inUrl = inUrl.replaceFirst("/", "//");
    159             } else inUrl = inUrl.replaceFirst(":", "://");
    160         }
    161         return inUrl;
    162     }
    163 
    164     // Returns the filtered URL. Cannot return null, but can return an empty string
    165     /* package */ static String filteredUrl(String inUrl) {
    166         if (inUrl == null) {
    167             return "";
    168         }
    169         if (inUrl.startsWith("content:")
    170                 || inUrl.startsWith("browser:")) {
    171             return "";
    172         }
    173         return inUrl;
    174     }
    175 
    176 }
    177