Home | History | Annotate | Download | only in browser
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.browser;
     18 
     19 import android.net.Uri;
     20 import android.util.Patterns;
     21 import android.webkit.URLUtil;
     22 
     23 import java.util.regex.Matcher;
     24 import java.util.regex.Pattern;
     25 
     26 /**
     27  * Utility methods for Url manipulation
     28  */
     29 public class UrlUtils {
     30 
     31     static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile(
     32             "(?i)" + // switch on case insensitive matching
     33             "(" +    // begin group for schema
     34             "(?:http|https|file):\\/\\/" +
     35             "|(?:inline|data|about|javascript):" +
     36             ")" +
     37             "(.*)" );
     38 
     39     // Google search
     40     private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s";
     41     private final static String QUERY_PLACE_HOLDER = "%s";
     42 
     43     // Regular expression to strip http:// and optionally
     44     // the trailing slash
     45     private static final Pattern STRIP_URL_PATTERN =
     46             Pattern.compile("^http://(.*?)/?$");
     47 
     48     private UrlUtils() { /* cannot be instantiated */ }
     49 
     50     /**
     51      * Strips the provided url of preceding "http://" and any trailing "/". Does not
     52      * strip "https://". If the provided string cannot be stripped, the original string
     53      * is returned.
     54      *
     55      * TODO: Put this in TextUtils to be used by other packages doing something similar.
     56      *
     57      * @param url a url to strip, like "http://www.google.com/"
     58      * @return a stripped url like "www.google.com", or the original string if it could
     59      *         not be stripped
     60      */
     61     public static String stripUrl(String url) {
     62         if (url == null) return null;
     63         Matcher m = STRIP_URL_PATTERN.matcher(url);
     64         if (m.matches()) {
     65             return m.group(1);
     66         } else {
     67             return url;
     68         }
     69     }
     70 
     71     protected static String smartUrlFilter(Uri inUri) {
     72         if (inUri != null) {
     73             return smartUrlFilter(inUri.toString());
     74         }
     75         return null;
     76     }
     77 
     78     /**
     79      * Attempts to determine whether user input is a URL or search
     80      * terms.  Anything with a space is passed to search.
     81      *
     82      * Converts to lowercase any mistakenly uppercased schema (i.e.,
     83      * "Http://" converts to "http://"
     84      *
     85      * @return Original or modified URL
     86      *
     87      */
     88     public static String smartUrlFilter(String url) {
     89         return smartUrlFilter(url, true);
     90     }
     91 
     92     /**
     93      * Attempts to determine whether user input is a URL or search
     94      * terms.  Anything with a space is passed to search if canBeSearch is true.
     95      *
     96      * Converts to lowercase any mistakenly uppercased schema (i.e.,
     97      * "Http://" converts to "http://"
     98      *
     99      * @param canBeSearch If true, will return a search url if it isn't a valid
    100      *                    URL. If false, invalid URLs will return null
    101      * @return Original or modified URL
    102      *
    103      */
    104     public static String smartUrlFilter(String url, boolean canBeSearch) {
    105         String inUrl = url.trim();
    106         boolean hasSpace = inUrl.indexOf(' ') != -1;
    107 
    108         Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl);
    109         if (matcher.matches()) {
    110             // force scheme to lowercase
    111             String scheme = matcher.group(1);
    112             String lcScheme = scheme.toLowerCase();
    113             if (!lcScheme.equals(scheme)) {
    114                 inUrl = lcScheme + matcher.group(2);
    115             }
    116             if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) {
    117                 inUrl = inUrl.replace(" ", "%20");
    118             }
    119             return inUrl;
    120         }
    121         if (!hasSpace) {
    122             if (Patterns.WEB_URL.matcher(inUrl).matches()) {
    123                 return URLUtil.guessUrl(inUrl);
    124             }
    125         }
    126         if (canBeSearch) {
    127             return URLUtil.composeSearchUrl(inUrl,
    128                     QUICKSEARCH_G, QUERY_PLACE_HOLDER);
    129         }
    130         return null;
    131     }
    132 
    133     /* package */ static String fixUrl(String inUrl) {
    134         // FIXME: Converting the url to lower case
    135         // duplicates functionality in smartUrlFilter().
    136         // However, changing all current callers of fixUrl to
    137         // call smartUrlFilter in addition may have unwanted
    138         // consequences, and is deferred for now.
    139         int colon = inUrl.indexOf(':');
    140         boolean allLower = true;
    141         for (int index = 0; index < colon; index++) {
    142             char ch = inUrl.charAt(index);
    143             if (!Character.isLetter(ch)) {
    144                 break;
    145             }
    146             allLower &= Character.isLowerCase(ch);
    147             if (index == colon - 1 && !allLower) {
    148                 inUrl = inUrl.substring(0, colon).toLowerCase()
    149                         + inUrl.substring(colon);
    150             }
    151         }
    152         if (inUrl.startsWith("http://") || inUrl.startsWith("https://"))
    153             return inUrl;
    154         if (inUrl.startsWith("http:") ||
    155                 inUrl.startsWith("https:")) {
    156             if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) {
    157                 inUrl = inUrl.replaceFirst("/", "//");
    158             } else inUrl = inUrl.replaceFirst(":", "://");
    159         }
    160         return inUrl;
    161     }
    162 
    163     // Returns the filtered URL. Cannot return null, but can return an empty string
    164     /* package */ static String filteredUrl(String inUrl) {
    165         if (inUrl == null) {
    166             return "";
    167         }
    168         if (inUrl.startsWith("content:")
    169                 || inUrl.startsWith("browser:")) {
    170             return "";
    171         }
    172         return inUrl;
    173     }
    174 
    175 }
    176