Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.net;
     18 
     19 import java.util.ArrayList;
     20 import java.util.HashMap;
     21 import java.util.List;
     22 import java.util.Locale;
     23 import java.util.Set;
     24 import java.util.StringTokenizer;
     25 
     26 /**
     27  *
     28  * Sanitizes the Query portion of a URL. Simple example:
     29  * <code>
     30  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
     31  * sanitizer.setAllowUnregisteredParamaters(true);
     32  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
     33  * String name = sanitizer.getValue("name"));
     34  * // name now contains "Joe_User"
     35  * </code>
     36  *
     37  * Register ValueSanitizers to customize the way individual
     38  * parameters are sanitized:
     39  * <code>
     40  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
     41  * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
     42  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
     43  * String name = sanitizer.getValue("name"));
     44  * // name now contains "Joe User". (The string is first decoded, which
     45  * // converts the '+' to a ' '. Then the string is sanitized, which
     46  * // converts the ' ' to an '_'. (The ' ' is converted because the default
     47  * unregistered parameter sanitizer does not allow any special characters,
     48  * and ' ' is a special character.)
     49  * </code>
     50  *
     51  * There are several ways to create ValueSanitizers. In order of increasing
     52  * sophistication:
     53  * <ol>
     54  * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
     55  * <li>Construct your own instance of
     56  * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
     57  * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
     58  * sanitizer.
     59  * </ol>
     60  *
     61  */
     62 public class UrlQuerySanitizer {
     63 
     64     /**
     65      * A simple tuple that holds parameter-value pairs.
     66      *
     67      */
     68     public class ParameterValuePair {
     69         /**
     70          * Construct a parameter-value tuple.
     71          * @param parameter an unencoded parameter
     72          * @param value an unencoded value
     73          */
     74         public ParameterValuePair(String parameter,
     75                 String value) {
     76             mParameter = parameter;
     77             mValue = value;
     78         }
     79         /**
     80          * The unencoded parameter
     81          */
     82         public String mParameter;
     83         /**
     84          * The unencoded value
     85          */
     86         public String mValue;
     87     }
     88 
     89     final private HashMap<String, ValueSanitizer> mSanitizers =
     90         new HashMap<String, ValueSanitizer>();
     91     final private HashMap<String, String> mEntries =
     92         new HashMap<String, String>();
     93     final private ArrayList<ParameterValuePair> mEntriesList =
     94         new ArrayList<ParameterValuePair>();
     95     private boolean mAllowUnregisteredParamaters;
     96     private boolean mPreferFirstRepeatedParameter;
     97     private ValueSanitizer mUnregisteredParameterValueSanitizer =
     98         getAllIllegal();
     99 
    100     /**
    101      * A functor used to sanitize a single query value.
    102      *
    103      */
    104     public static interface ValueSanitizer {
    105         /**
    106          * Sanitize an unencoded value.
    107          * @param value
    108          * @return the sanitized unencoded value
    109          */
    110         public String sanitize(String value);
    111     }
    112 
    113     /**
    114      * Sanitize values based on which characters they contain. Illegal
    115      * characters are replaced with either space or '_', depending upon
    116      * whether space is a legal character or not.
    117      */
    118     public static class IllegalCharacterValueSanitizer implements
    119         ValueSanitizer {
    120         private int mFlags;
    121 
    122         /**
    123          * Allow space (' ') characters.
    124          */
    125         public final static int SPACE_OK =              1 << 0;
    126         /**
    127          * Allow whitespace characters other than space. The
    128          * other whitespace characters are
    129          * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
    130          */
    131         public final static int OTHER_WHITESPACE_OK =  1 << 1;
    132         /**
    133          * Allow characters with character codes 128 to 255.
    134          */
    135         public final static int NON_7_BIT_ASCII_OK =    1 << 2;
    136         /**
    137          * Allow double quote characters. ('"')
    138          */
    139         public final static int DQUOTE_OK =             1 << 3;
    140         /**
    141          * Allow single quote characters. ('\'')
    142          */
    143         public final static int SQUOTE_OK =             1 << 4;
    144         /**
    145          * Allow less-than characters. ('<')
    146          */
    147         public final static int LT_OK =                 1 << 5;
    148         /**
    149          * Allow greater-than characters. ('>')
    150          */
    151         public final static int GT_OK =                 1 << 6;
    152         /**
    153          * Allow ampersand characters ('&')
    154          */
    155         public final static int AMP_OK =                1 << 7;
    156         /**
    157          * Allow percent-sign characters ('%')
    158          */
    159         public final static int PCT_OK =                1 << 8;
    160         /**
    161          * Allow nul characters ('\0')
    162          */
    163         public final static int NUL_OK =                1 << 9;
    164         /**
    165          * Allow text to start with a script URL
    166          * such as "javascript:" or "vbscript:"
    167          */
    168         public final static int SCRIPT_URL_OK =         1 << 10;
    169 
    170         /**
    171          * Mask with all fields set to OK
    172          */
    173         public final static int ALL_OK =                0x7ff;
    174 
    175         /**
    176          * Mask with both regular space and other whitespace OK
    177          */
    178         public final static int ALL_WHITESPACE_OK =
    179             SPACE_OK | OTHER_WHITESPACE_OK;
    180 
    181 
    182         // Common flag combinations:
    183 
    184         /**
    185          * <ul>
    186          * <li>Deny all special characters.
    187          * <li>Deny script URLs.
    188          * </ul>
    189          */
    190         public final static int ALL_ILLEGAL =
    191             0;
    192         /**
    193          * <ul>
    194          * <li>Allow all special characters except Nul. ('\0').
    195          * <li>Allow script URLs.
    196          * </ul>
    197          */
    198         public final static int ALL_BUT_NUL_LEGAL =
    199             ALL_OK & ~NUL_OK;
    200         /**
    201          * <ul>
    202          * <li>Allow all special characters except for:
    203          * <ul>
    204          *  <li>whitespace characters
    205          *  <li>Nul ('\0')
    206          * </ul>
    207          * <li>Allow script URLs.
    208          * </ul>
    209          */
    210         public final static int ALL_BUT_WHITESPACE_LEGAL =
    211             ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
    212         /**
    213          * <ul>
    214          * <li>Allow characters used by encoded URLs.
    215          * <li>Deny script URLs.
    216          * </ul>
    217          */
    218         public final static int URL_LEGAL =
    219             NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
    220         /**
    221          * <ul>
    222          * <li>Allow characters used by encoded URLs.
    223          * <li>Allow spaces.
    224          * <li>Deny script URLs.
    225          * </ul>
    226          */
    227         public final static int URL_AND_SPACE_LEGAL =
    228             URL_LEGAL | SPACE_OK;
    229         /**
    230          * <ul>
    231          * <li>Allow ampersand.
    232          * <li>Deny script URLs.
    233          * </ul>
    234          */
    235         public final static int AMP_LEGAL =
    236             AMP_OK;
    237         /**
    238          * <ul>
    239          * <li>Allow ampersand.
    240          * <li>Allow space.
    241          * <li>Deny script URLs.
    242          * </ul>
    243          */
    244         public final static int AMP_AND_SPACE_LEGAL =
    245             AMP_OK | SPACE_OK;
    246         /**
    247          * <ul>
    248          * <li>Allow space.
    249          * <li>Deny script URLs.
    250          * </ul>
    251          */
    252         public final static int SPACE_LEGAL =
    253             SPACE_OK;
    254         /**
    255          * <ul>
    256          * <li>Allow all but.
    257          * <ul>
    258          *  <li>Nul ('\0')
    259          *  <li>Angle brackets ('<', '>')
    260          * </ul>
    261          * <li>Deny script URLs.
    262          * </ul>
    263          */
    264         public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
    265             ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
    266 
    267         /**
    268          *  Script URL definitions
    269          */
    270 
    271         private final static String JAVASCRIPT_PREFIX = "javascript:";
    272 
    273         private final static String VBSCRIPT_PREFIX = "vbscript:";
    274 
    275         private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
    276                 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
    277 
    278         /**
    279          * Construct a sanitizer. The parameters set the behavior of the
    280          * sanitizer.
    281          * @param flags some combination of the XXX_OK flags.
    282          */
    283         public IllegalCharacterValueSanitizer(
    284             int flags) {
    285             mFlags = flags;
    286         }
    287         /**
    288          * Sanitize a value.
    289          * <ol>
    290          * <li>If script URLs are not OK, the will be removed.
    291          * <li>If neither spaces nor other white space is OK, then
    292          * white space will be trimmed from the beginning and end of
    293          * the URL. (Just the actual white space characters are trimmed, not
    294          * other control codes.)
    295          * <li> Illegal characters will be replaced with
    296          * either ' ' or '_', depending on whether a space is itself a
    297          * legal character.
    298          * </ol>
    299          * @param value
    300          * @return the sanitized value
    301          */
    302         public String sanitize(String value) {
    303             if (value == null) {
    304                 return null;
    305             }
    306             int length = value.length();
    307             if ((mFlags & SCRIPT_URL_OK) != 0) {
    308                 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
    309                     String asLower = value.toLowerCase(Locale.ROOT);
    310                     if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
    311                         asLower.startsWith(VBSCRIPT_PREFIX)) {
    312                         return "";
    313                     }
    314                 }
    315             }
    316 
    317             // If whitespace isn't OK, get rid of whitespace at beginning
    318             // and end of value.
    319             if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
    320                 value = trimWhitespace(value);
    321                 // The length could have changed, so we need to correct
    322                 // the length variable.
    323                 length = value.length();
    324             }
    325 
    326             StringBuilder stringBuilder = new StringBuilder(length);
    327             for(int i = 0; i < length; i++) {
    328                 char c = value.charAt(i);
    329                 if (!characterIsLegal(c)) {
    330                     if ((mFlags & SPACE_OK) != 0) {
    331                         c = ' ';
    332                     }
    333                     else {
    334                         c = '_';
    335                     }
    336                 }
    337                 stringBuilder.append(c);
    338             }
    339             return stringBuilder.toString();
    340         }
    341 
    342         /**
    343          * Trim whitespace from the beginning and end of a string.
    344          * <p>
    345          * Note: can't use {@link String#trim} because {@link String#trim} has a
    346          * different definition of whitespace than we want.
    347          * @param value the string to trim
    348          * @return the trimmed string
    349          */
    350         private String trimWhitespace(String value) {
    351             int start = 0;
    352             int last = value.length() - 1;
    353             int end = last;
    354             while (start <= end && isWhitespace(value.charAt(start))) {
    355                 start++;
    356             }
    357             while (end >= start && isWhitespace(value.charAt(end))) {
    358                 end--;
    359             }
    360             if (start == 0 && end == last) {
    361                 return value;
    362             }
    363             return value.substring(start, end + 1);
    364         }
    365 
    366         /**
    367          * Check if c is whitespace.
    368          * @param c character to test
    369          * @return true if c is a whitespace character
    370          */
    371         private boolean isWhitespace(char c) {
    372             switch(c) {
    373             case ' ':
    374             case '\t':
    375             case '\f':
    376             case '\n':
    377             case '\r':
    378             case 11: /* VT */
    379                 return true;
    380             default:
    381                 return false;
    382             }
    383         }
    384 
    385         /**
    386          * Check whether an individual character is legal. Uses the
    387          * flag bit-set passed into the constructor.
    388          * @param c
    389          * @return true if c is a legal character
    390          */
    391         private boolean characterIsLegal(char c) {
    392             switch(c) {
    393             case ' ' : return (mFlags & SPACE_OK) != 0;
    394             case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
    395               return (mFlags & OTHER_WHITESPACE_OK) != 0;
    396             case '\"': return (mFlags & DQUOTE_OK) != 0;
    397             case '\'': return (mFlags & SQUOTE_OK) != 0;
    398             case '<' : return (mFlags & LT_OK) != 0;
    399             case '>' : return (mFlags & GT_OK) != 0;
    400             case '&' : return (mFlags & AMP_OK) != 0;
    401             case '%' : return (mFlags & PCT_OK) != 0;
    402             case '\0': return (mFlags & NUL_OK) != 0;
    403             default  : return (c >= 32 && c < 127) ||
    404                 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
    405             }
    406         }
    407     }
    408 
    409     /**
    410      * Get the current value sanitizer used when processing
    411      * unregistered parameter values.
    412      * <p>
    413      * <b>Note:</b> The default unregistered parameter value sanitizer is
    414      * one that doesn't allow any special characters, similar to what
    415      * is returned by calling createAllIllegal.
    416      *
    417      * @return the current ValueSanitizer used to sanitize unregistered
    418      * parameter values.
    419      */
    420     public ValueSanitizer getUnregisteredParameterValueSanitizer() {
    421         return mUnregisteredParameterValueSanitizer;
    422     }
    423 
    424     /**
    425      * Set the value sanitizer used when processing unregistered
    426      * parameter values.
    427      * @param sanitizer set the ValueSanitizer used to sanitize unregistered
    428      * parameter values.
    429      */
    430     public void setUnregisteredParameterValueSanitizer(
    431             ValueSanitizer sanitizer) {
    432         mUnregisteredParameterValueSanitizer = sanitizer;
    433     }
    434 
    435 
    436     // Private fields for singleton sanitizers:
    437 
    438     private static final ValueSanitizer sAllIllegal =
    439         new IllegalCharacterValueSanitizer(
    440                 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
    441 
    442     private static final ValueSanitizer sAllButNulLegal =
    443         new IllegalCharacterValueSanitizer(
    444                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
    445 
    446     private static final ValueSanitizer sAllButWhitespaceLegal =
    447         new IllegalCharacterValueSanitizer(
    448                 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
    449 
    450     private static final ValueSanitizer sURLLegal =
    451         new IllegalCharacterValueSanitizer(
    452                 IllegalCharacterValueSanitizer.URL_LEGAL);
    453 
    454     private static final ValueSanitizer sUrlAndSpaceLegal =
    455         new IllegalCharacterValueSanitizer(
    456                 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
    457 
    458     private static final ValueSanitizer sAmpLegal =
    459         new IllegalCharacterValueSanitizer(
    460                 IllegalCharacterValueSanitizer.AMP_LEGAL);
    461 
    462     private static final ValueSanitizer sAmpAndSpaceLegal =
    463         new IllegalCharacterValueSanitizer(
    464                 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
    465 
    466     private static final ValueSanitizer sSpaceLegal =
    467         new IllegalCharacterValueSanitizer(
    468                 IllegalCharacterValueSanitizer.SPACE_LEGAL);
    469 
    470     private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
    471         new IllegalCharacterValueSanitizer(
    472                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
    473 
    474     /**
    475      * Return a value sanitizer that does not allow any special characters,
    476      * and also does not allow script URLs.
    477      * @return a value sanitizer
    478      */
    479     public static final ValueSanitizer getAllIllegal() {
    480         return sAllIllegal;
    481     }
    482 
    483     /**
    484      * Return a value sanitizer that allows everything except Nul ('\0')
    485      * characters. Script URLs are allowed.
    486      * @return a value sanitizer
    487      */
    488     public static final ValueSanitizer getAllButNulLegal() {
    489         return sAllButNulLegal;
    490     }
    491     /**
    492      * Return a value sanitizer that allows everything except Nul ('\0')
    493      * characters, space (' '), and other whitespace characters.
    494      * Script URLs are allowed.
    495      * @return a value sanitizer
    496      */
    497     public static final ValueSanitizer getAllButWhitespaceLegal() {
    498         return sAllButWhitespaceLegal;
    499     }
    500     /**
    501      * Return a value sanitizer that allows all the characters used by
    502      * encoded URLs. Does not allow script URLs.
    503      * @return a value sanitizer
    504      */
    505     public static final ValueSanitizer getUrlLegal() {
    506         return sURLLegal;
    507     }
    508     /**
    509      * Return a value sanitizer that allows all the characters used by
    510      * encoded URLs and allows spaces, which are not technically legal
    511      * in encoded URLs, but commonly appear anyway.
    512      * Does not allow script URLs.
    513      * @return a value sanitizer
    514      */
    515     public static final ValueSanitizer getUrlAndSpaceLegal() {
    516         return sUrlAndSpaceLegal;
    517     }
    518     /**
    519      * Return a value sanitizer that does not allow any special characters
    520      * except ampersand ('&'). Does not allow script URLs.
    521      * @return a value sanitizer
    522      */
    523     public static final ValueSanitizer getAmpLegal() {
    524         return sAmpLegal;
    525     }
    526     /**
    527      * Return a value sanitizer that does not allow any special characters
    528      * except ampersand ('&') and space (' '). Does not allow script URLs.
    529      * @return a value sanitizer
    530      */
    531     public static final ValueSanitizer getAmpAndSpaceLegal() {
    532         return sAmpAndSpaceLegal;
    533     }
    534     /**
    535      * Return a value sanitizer that does not allow any special characters
    536      * except space (' '). Does not allow script URLs.
    537      * @return a value sanitizer
    538      */
    539     public static final ValueSanitizer getSpaceLegal() {
    540         return sSpaceLegal;
    541     }
    542     /**
    543      * Return a value sanitizer that allows any special characters
    544      * except angle brackets ('<' and '>') and Nul ('\0').
    545      * Allows script URLs.
    546      * @return a value sanitizer
    547      */
    548     public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
    549         return sAllButNulAndAngleBracketsLegal;
    550     }
    551 
    552     /**
    553      * Constructs a UrlQuerySanitizer.
    554      * <p>
    555      * Defaults:
    556      * <ul>
    557      * <li>unregistered parameters are not allowed.
    558      * <li>the last instance of a repeated parameter is preferred.
    559      * <li>The default value sanitizer is an AllIllegal value sanitizer.
    560      * <ul>
    561      */
    562     public UrlQuerySanitizer() {
    563     }
    564 
    565     /**
    566      * Constructs a UrlQuerySanitizer and parse a URL.
    567      * This constructor is provided for convenience when the
    568      * default parsing behavior is acceptable.
    569      * <p>
    570      * Because the URL is parsed before the constructor returns, there isn't
    571      * a chance to configure the sanitizer to change the parsing behavior.
    572      * <p>
    573      * <code>
    574      * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
    575      * String name = sanitizer.getValue("name");
    576      * </code>
    577      * <p>
    578      * Defaults:
    579      * <ul>
    580      * <li>unregistered parameters <em>are</em> allowed.
    581      * <li>the last instance of a repeated parameter is preferred.
    582      * <li>The default value sanitizer is an AllIllegal value sanitizer.
    583      * <ul>
    584      */
    585     public UrlQuerySanitizer(String url) {
    586         setAllowUnregisteredParamaters(true);
    587         parseUrl(url);
    588     }
    589 
    590     /**
    591      * Parse the query parameters out of an encoded URL.
    592      * Works by extracting the query portion from the URL and then
    593      * calling parseQuery(). If there is no query portion it is
    594      * treated as if the query portion is an empty string.
    595      * @param url the encoded URL to parse.
    596      */
    597     public void parseUrl(String url) {
    598         int queryIndex = url.indexOf('?');
    599         String query;
    600         if (queryIndex >= 0) {
    601             query = url.substring(queryIndex + 1);
    602         }
    603         else {
    604             query = "";
    605         }
    606         parseQuery(query);
    607     }
    608 
    609     /**
    610      * Parse a query. A query string is any number of parameter-value clauses
    611      * separated by any non-zero number of ampersands. A parameter-value clause
    612      * is a parameter followed by an equal sign, followed by a value. If the
    613      * equal sign is missing, the value is assumed to be the empty string.
    614      * @param query the query to parse.
    615      */
    616     public void parseQuery(String query) {
    617         clear();
    618         // Split by '&'
    619         StringTokenizer tokenizer = new StringTokenizer(query, "&");
    620         while(tokenizer.hasMoreElements()) {
    621             String attributeValuePair = tokenizer.nextToken();
    622             if (attributeValuePair.length() > 0) {
    623                 int assignmentIndex = attributeValuePair.indexOf('=');
    624                 if (assignmentIndex < 0) {
    625                     // No assignment found, treat as if empty value
    626                     parseEntry(attributeValuePair, "");
    627                 }
    628                 else {
    629                     parseEntry(attributeValuePair.substring(0, assignmentIndex),
    630                             attributeValuePair.substring(assignmentIndex + 1));
    631                 }
    632             }
    633         }
    634     }
    635 
    636     /**
    637      * Get a set of all of the parameters found in the sanitized query.
    638      * <p>
    639      * Note: Do not modify this set. Treat it as a read-only set.
    640      * @return all the parameters found in the current query.
    641      */
    642     public Set<String> getParameterSet() {
    643         return mEntries.keySet();
    644     }
    645 
    646     /**
    647      * An array list of all of the parameter value pairs in the sanitized
    648      * query, in the order they appeared in the query. May contain duplicate
    649      * parameters.
    650      * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
    651      */
    652     public List<ParameterValuePair> getParameterList() {
    653         return mEntriesList;
    654     }
    655 
    656     /**
    657      * Check if a parameter exists in the current sanitized query.
    658      * @param parameter the unencoded name of a parameter.
    659      * @return true if the paramater exists in the current sanitized queary.
    660      */
    661     public boolean hasParameter(String parameter) {
    662         return mEntries.containsKey(parameter);
    663     }
    664 
    665     /**
    666      * Get the value for a parameter in the current sanitized query.
    667      * Returns null if the parameter does not
    668      * exit.
    669      * @param parameter the unencoded name of a parameter.
    670      * @return the sanitized unencoded value of the parameter,
    671      * or null if the parameter does not exist.
    672      */
    673     public String getValue(String parameter) {
    674         return mEntries.get(parameter);
    675     }
    676 
    677     /**
    678      * Register a value sanitizer for a particular parameter. Can also be used
    679      * to replace or remove an already-set value sanitizer.
    680      * <p>
    681      * Registering a non-null value sanitizer for a particular parameter
    682      * makes that parameter a registered parameter.
    683      * @param parameter an unencoded parameter name
    684      * @param valueSanitizer the value sanitizer to use for a particular
    685      * parameter. May be null in order to unregister that parameter.
    686      * @see #getAllowUnregisteredParamaters()
    687      */
    688     public void registerParameter(String parameter,
    689             ValueSanitizer valueSanitizer) {
    690         if (valueSanitizer == null) {
    691             mSanitizers.remove(parameter);
    692         }
    693         mSanitizers.put(parameter, valueSanitizer);
    694     }
    695 
    696     /**
    697      * Register a value sanitizer for an array of parameters.
    698      * @param parameters An array of unencoded parameter names.
    699      * @param valueSanitizer
    700      * @see #registerParameter
    701      */
    702     public void registerParameters(String[] parameters,
    703             ValueSanitizer valueSanitizer) {
    704         int length = parameters.length;
    705         for(int i = 0; i < length; i++) {
    706             mSanitizers.put(parameters[i], valueSanitizer);
    707         }
    708     }
    709 
    710     /**
    711      * Set whether or not unregistered parameters are allowed. If they
    712      * are not allowed, then they will be dropped when a query is sanitized.
    713      * <p>
    714      * Defaults to false.
    715      * @param allowUnregisteredParamaters true to allow unregistered parameters.
    716      * @see #getAllowUnregisteredParamaters()
    717      */
    718     public void setAllowUnregisteredParamaters(
    719             boolean allowUnregisteredParamaters) {
    720         mAllowUnregisteredParamaters = allowUnregisteredParamaters;
    721     }
    722 
    723     /**
    724      * Get whether or not unregistered parameters are allowed. If not
    725      * allowed, they will be dropped when a query is parsed.
    726      * @return true if unregistered parameters are allowed.
    727      * @see #setAllowUnregisteredParamaters(boolean)
    728      */
    729     public boolean getAllowUnregisteredParamaters() {
    730         return mAllowUnregisteredParamaters;
    731     }
    732 
    733     /**
    734      * Set whether or not the first occurrence of a repeated parameter is
    735      * preferred. True means the first repeated parameter is preferred.
    736      * False means that the last repeated parameter is preferred.
    737      * <p>
    738      * The preferred parameter is the one that is returned when getParameter
    739      * is called.
    740      * <p>
    741      * defaults to false.
    742      * @param preferFirstRepeatedParameter True if the first repeated
    743      * parameter is preferred.
    744      * @see #getPreferFirstRepeatedParameter()
    745      */
    746     public void setPreferFirstRepeatedParameter(
    747             boolean preferFirstRepeatedParameter) {
    748         mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
    749     }
    750 
    751     /**
    752      * Get whether or not the first occurrence of a repeated parameter is
    753      * preferred.
    754      * @return true if the first occurrence of a repeated parameter is
    755      * preferred.
    756      * @see #setPreferFirstRepeatedParameter(boolean)
    757      */
    758     public boolean getPreferFirstRepeatedParameter() {
    759         return mPreferFirstRepeatedParameter;
    760     }
    761 
    762     /**
    763      * Parse an escaped parameter-value pair. The default implementation
    764      * unescapes both the parameter and the value, then looks up the
    765      * effective value sanitizer for the parameter and uses it to sanitize
    766      * the value. If all goes well then addSanitizedValue is called with
    767      * the unescaped parameter and the sanitized unescaped value.
    768      * @param parameter an escaped parameter
    769      * @param value an unsanitzied escaped value
    770      */
    771     protected void parseEntry(String parameter, String value) {
    772         String unescapedParameter = unescape(parameter);
    773          ValueSanitizer valueSanitizer =
    774             getEffectiveValueSanitizer(unescapedParameter);
    775 
    776         if (valueSanitizer == null) {
    777             return;
    778         }
    779         String unescapedValue = unescape(value);
    780         String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
    781         addSanitizedEntry(unescapedParameter, sanitizedValue);
    782     }
    783 
    784     /**
    785      * Record a sanitized parameter-value pair. Override if you want to
    786      * do additional filtering or validation.
    787      * @param parameter an unescaped parameter
    788      * @param value a sanitized unescaped value
    789      */
    790     protected void addSanitizedEntry(String parameter, String value) {
    791         mEntriesList.add(
    792                 new ParameterValuePair(parameter, value));
    793         if (mPreferFirstRepeatedParameter) {
    794             if (mEntries.containsKey(parameter)) {
    795                 return;
    796             }
    797         }
    798         mEntries.put(parameter, value);
    799     }
    800 
    801     /**
    802      * Get the value sanitizer for a parameter. Returns null if there
    803      * is no value sanitizer registered for the parameter.
    804      * @param parameter the unescaped parameter
    805      * @return the currently registered value sanitizer for this parameter.
    806      * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
    807      */
    808     public ValueSanitizer getValueSanitizer(String parameter) {
    809         return mSanitizers.get(parameter);
    810     }
    811 
    812     /**
    813      * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
    814      * except if there is no value sanitizer registered for a parameter, and
    815      * unregistered paramaters are allowed, then the default value sanitizer is
    816      * returned.
    817      * @param parameter an unescaped parameter
    818      * @return the effective value sanitizer for a parameter.
    819      */
    820     public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
    821         ValueSanitizer sanitizer = getValueSanitizer(parameter);
    822         if (sanitizer == null && mAllowUnregisteredParamaters) {
    823             sanitizer = getUnregisteredParameterValueSanitizer();
    824         }
    825         return sanitizer;
    826     }
    827 
    828     /**
    829      * Unescape an escaped string.
    830      * <ul>
    831      * <li>'+' characters are replaced by
    832      * ' ' characters.
    833      * <li>Valid "%xx" escape sequences are replaced by the
    834      * corresponding unescaped character.
    835      * <li>Invalid escape sequences such as %1z", are passed through unchanged.
    836      * <ol>
    837      * @param string the escaped string
    838      * @return the unescaped string.
    839      */
    840     public String unescape(String string) {
    841         // Early exit if no escaped characters.
    842         int firstEscape = string.indexOf('%');
    843         if ( firstEscape < 0) {
    844             firstEscape = string.indexOf('+');
    845             if (firstEscape < 0) {
    846                 return string;
    847             }
    848         }
    849 
    850         int length = string.length();
    851 
    852         StringBuilder stringBuilder = new StringBuilder(length);
    853         stringBuilder.append(string.substring(0, firstEscape));
    854         for (int i = firstEscape; i < length; i++) {
    855             char c = string.charAt(i);
    856             if (c == '+') {
    857                 c = ' ';
    858             }
    859             else if ( c == '%' && i + 2 < length) {
    860                 char c1 = string.charAt(i + 1);
    861                 char c2 = string.charAt(i + 2);
    862                 if (isHexDigit(c1) && isHexDigit(c2)) {
    863                     c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
    864                     i += 2;
    865                 }
    866             }
    867             stringBuilder.append(c);
    868         }
    869         return stringBuilder.toString();
    870     }
    871 
    872     /**
    873      * Test if a character is a hexidecimal digit. Both upper case and lower
    874      * case hex digits are allowed.
    875      * @param c the character to test
    876      * @return true if c is a hex digit.
    877      */
    878     protected boolean isHexDigit(char c) {
    879         return decodeHexDigit(c) >= 0;
    880     }
    881 
    882     /**
    883      * Convert a character that represents a hexidecimal digit into an integer.
    884      * If the character is not a hexidecimal digit, then -1 is returned.
    885      * Both upper case and lower case hex digits are allowed.
    886      * @param c the hexidecimal digit.
    887      * @return the integer value of the hexidecimal digit.
    888      */
    889 
    890     protected int decodeHexDigit(char c) {
    891         if (c >= '0' && c <= '9') {
    892             return c - '0';
    893         }
    894         else if (c >= 'A' && c <= 'F') {
    895             return c - 'A' + 10;
    896         }
    897         else if (c >= 'a' && c <= 'f') {
    898             return c - 'a' + 10;
    899         }
    900         else {
    901             return -1;
    902         }
    903     }
    904 
    905     /**
    906      * Clear the existing entries. Called to get ready to parse a new
    907      * query string.
    908      */
    909     protected void clear() {
    910         mEntries.clear();
    911         mEntriesList.clear();
    912     }
    913 }
    914 
    915