Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package android.net;
     18 
     19 import java.util.ArrayList;
     20 import java.util.HashMap;
     21 import java.util.List;
     22 import java.util.Set;
     23 import java.util.StringTokenizer;
     24 
     25 /**
     26  *
     27  * Sanitizes the Query portion of a URL. Simple example:
     28  * <code>
     29  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
     30  * sanitizer.setAllowUnregisteredParamaters(true);
     31  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
     32  * String name = sanitizer.getValue("name"));
     33  * // name now contains "Joe_User"
     34  * </code>
     35  *
     36  * Register ValueSanitizers to customize the way individual
     37  * parameters are sanitized:
     38  * <code>
     39  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
     40  * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
     41  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
     42  * String name = sanitizer.getValue("name"));
     43  * // name now contains "Joe User". (The string is first decoded, which
     44  * // converts the '+' to a ' '. Then the string is sanitized, which
     45  * // converts the ' ' to an '_'. (The ' ' is converted because the default
     46  * unregistered parameter sanitizer does not allow any special characters,
     47  * and ' ' is a special character.)
     48  * </code>
     49  *
     50  * There are several ways to create ValueSanitizers. In order of increasing
     51  * sophistication:
     52  * <ol>
     53  * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
     54  * <li>Construct your own instance of
     55  * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
     56  * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
     57  * sanitizer.
     58  * </ol>
     59  *
     60  */
     61 public class UrlQuerySanitizer {
     62 
     63     /**
     64      * A simple tuple that holds parameter-value pairs.
     65      *
     66      */
     67     public class ParameterValuePair {
     68         /**
     69          * Construct a parameter-value tuple.
     70          * @param parameter an unencoded parameter
     71          * @param value an unencoded value
     72          */
     73         public ParameterValuePair(String parameter,
     74                 String value) {
     75             mParameter = parameter;
     76             mValue = value;
     77         }
     78         /**
     79          * The unencoded parameter
     80          */
     81         public String mParameter;
     82         /**
     83          * The unencoded value
     84          */
     85         public String mValue;
     86     }
     87 
     88     final private HashMap<String, ValueSanitizer> mSanitizers =
     89         new HashMap<String, ValueSanitizer>();
     90     final private HashMap<String, String> mEntries =
     91         new HashMap<String, String>();
     92     final private ArrayList<ParameterValuePair> mEntriesList =
     93         new ArrayList<ParameterValuePair>();
     94     private boolean mAllowUnregisteredParamaters;
     95     private boolean mPreferFirstRepeatedParameter;
     96     private ValueSanitizer mUnregisteredParameterValueSanitizer =
     97         getAllIllegal();
     98 
     99     /**
    100      * A functor used to sanitize a single query value.
    101      *
    102      */
    103     public static interface ValueSanitizer {
    104         /**
    105          * Sanitize an unencoded value.
    106          * @param value
    107          * @return the sanitized unencoded value
    108          */
    109         public String sanitize(String value);
    110     }
    111 
    112     /**
    113      * Sanitize values based on which characters they contain. Illegal
    114      * characters are replaced with either space or '_', depending upon
    115      * whether space is a legal character or not.
    116      */
    117     public static class IllegalCharacterValueSanitizer implements
    118         ValueSanitizer {
    119         private int mFlags;
    120 
    121         /**
    122          * Allow space (' ') characters.
    123          */
    124         public final static int SPACE_OK =              1 << 0;
    125         /**
    126          * Allow whitespace characters other than space. The
    127          * other whitespace characters are
    128          * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
    129          */
    130         public final static int OTHER_WHITESPACE_OK =  1 << 1;
    131         /**
    132          * Allow characters with character codes 128 to 255.
    133          */
    134         public final static int NON_7_BIT_ASCII_OK =    1 << 2;
    135         /**
    136          * Allow double quote characters. ('"')
    137          */
    138         public final static int DQUOTE_OK =             1 << 3;
    139         /**
    140          * Allow single quote characters. ('\'')
    141          */
    142         public final static int SQUOTE_OK =             1 << 4;
    143         /**
    144          * Allow less-than characters. ('<')
    145          */
    146         public final static int LT_OK =                 1 << 5;
    147         /**
    148          * Allow greater-than characters. ('>')
    149          */
    150         public final static int GT_OK =                 1 << 6;
    151         /**
    152          * Allow ampersand characters ('&')
    153          */
    154         public final static int AMP_OK =                1 << 7;
    155         /**
    156          * Allow percent-sign characters ('%')
    157          */
    158         public final static int PCT_OK =                1 << 8;
    159         /**
    160          * Allow nul characters ('\0')
    161          */
    162         public final static int NUL_OK =                1 << 9;
    163         /**
    164          * Allow text to start with a script URL
    165          * such as "javascript:" or "vbscript:"
    166          */
    167         public final static int SCRIPT_URL_OK =         1 << 10;
    168 
    169         /**
    170          * Mask with all fields set to OK
    171          */
    172         public final static int ALL_OK =                0x7ff;
    173 
    174         /**
    175          * Mask with both regular space and other whitespace OK
    176          */
    177         public final static int ALL_WHITESPACE_OK =
    178             SPACE_OK | OTHER_WHITESPACE_OK;
    179 
    180 
    181         // Common flag combinations:
    182 
    183         /**
    184          * <ul>
    185          * <li>Deny all special characters.
    186          * <li>Deny script URLs.
    187          * </ul>
    188          */
    189         public final static int ALL_ILLEGAL =
    190             0;
    191         /**
    192          * <ul>
    193          * <li>Allow all special characters except Nul. ('\0').
    194          * <li>Allow script URLs.
    195          * </ul>
    196          */
    197         public final static int ALL_BUT_NUL_LEGAL =
    198             ALL_OK & ~NUL_OK;
    199         /**
    200          * <ul>
    201          * <li>Allow all special characters except for:
    202          * <ul>
    203          *  <li>whitespace characters
    204          *  <li>Nul ('\0')
    205          * </ul>
    206          * <li>Allow script URLs.
    207          * </ul>
    208          */
    209         public final static int ALL_BUT_WHITESPACE_LEGAL =
    210             ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
    211         /**
    212          * <ul>
    213          * <li>Allow characters used by encoded URLs.
    214          * <li>Deny script URLs.
    215          * </ul>
    216          */
    217         public final static int URL_LEGAL =
    218             NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
    219         /**
    220          * <ul>
    221          * <li>Allow characters used by encoded URLs.
    222          * <li>Allow spaces.
    223          * <li>Deny script URLs.
    224          * </ul>
    225          */
    226         public final static int URL_AND_SPACE_LEGAL =
    227             URL_LEGAL | SPACE_OK;
    228         /**
    229          * <ul>
    230          * <li>Allow ampersand.
    231          * <li>Deny script URLs.
    232          * </ul>
    233          */
    234         public final static int AMP_LEGAL =
    235             AMP_OK;
    236         /**
    237          * <ul>
    238          * <li>Allow ampersand.
    239          * <li>Allow space.
    240          * <li>Deny script URLs.
    241          * </ul>
    242          */
    243         public final static int AMP_AND_SPACE_LEGAL =
    244             AMP_OK | SPACE_OK;
    245         /**
    246          * <ul>
    247          * <li>Allow space.
    248          * <li>Deny script URLs.
    249          * </ul>
    250          */
    251         public final static int SPACE_LEGAL =
    252             SPACE_OK;
    253         /**
    254          * <ul>
    255          * <li>Allow all but.
    256          * <ul>
    257          *  <li>Nul ('\0')
    258          *  <li>Angle brackets ('<', '>')
    259          * </ul>
    260          * <li>Deny script URLs.
    261          * </ul>
    262          */
    263         public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
    264             ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
    265 
    266         /**
    267          *  Script URL definitions
    268          */
    269 
    270         private final static String JAVASCRIPT_PREFIX = "javascript:";
    271 
    272         private final static String VBSCRIPT_PREFIX = "vbscript:";
    273 
    274         private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
    275                 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
    276 
    277         /**
    278          * Construct a sanitizer. The parameters set the behavior of the
    279          * sanitizer.
    280          * @param flags some combination of the XXX_OK flags.
    281          */
    282         public IllegalCharacterValueSanitizer(
    283             int flags) {
    284             mFlags = flags;
    285         }
    286         /**
    287          * Sanitize a value.
    288          * <ol>
    289          * <li>If script URLs are not OK, the will be removed.
    290          * <li>If neither spaces nor other white space is OK, then
    291          * white space will be trimmed from the beginning and end of
    292          * the URL. (Just the actual white space characters are trimmed, not
    293          * other control codes.)
    294          * <li> Illegal characters will be replaced with
    295          * either ' ' or '_', depending on whether a space is itself a
    296          * legal character.
    297          * </ol>
    298          * @param value
    299          * @return the sanitized value
    300          */
    301         public String sanitize(String value) {
    302             if (value == null) {
    303                 return null;
    304             }
    305             int length = value.length();
    306             if ((mFlags & SCRIPT_URL_OK) != 0) {
    307                 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
    308                     String asLower = value.toLowerCase();
    309                     if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
    310                         asLower.startsWith(VBSCRIPT_PREFIX)) {
    311                         return "";
    312                     }
    313                 }
    314             }
    315 
    316             // If whitespace isn't OK, get rid of whitespace at beginning
    317             // and end of value.
    318             if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
    319                 value = trimWhitespace(value);
    320                 // The length could have changed, so we need to correct
    321                 // the length variable.
    322                 length = value.length();
    323             }
    324 
    325             StringBuilder stringBuilder = new StringBuilder(length);
    326             for(int i = 0; i < length; i++) {
    327                 char c = value.charAt(i);
    328                 if (!characterIsLegal(c)) {
    329                     if ((mFlags & SPACE_OK) != 0) {
    330                         c = ' ';
    331                     }
    332                     else {
    333                         c = '_';
    334                     }
    335                 }
    336                 stringBuilder.append(c);
    337             }
    338             return stringBuilder.toString();
    339         }
    340 
    341         /**
    342          * Trim whitespace from the beginning and end of a string.
    343          * <p>
    344          * Note: can't use {@link String#trim} because {@link String#trim} has a
    345          * different definition of whitespace than we want.
    346          * @param value the string to trim
    347          * @return the trimmed string
    348          */
    349         private String trimWhitespace(String value) {
    350             int start = 0;
    351             int last = value.length() - 1;
    352             int end = last;
    353             while (start <= end && isWhitespace(value.charAt(start))) {
    354                 start++;
    355             }
    356             while (end >= start && isWhitespace(value.charAt(end))) {
    357                 end--;
    358             }
    359             if (start == 0 && end == last) {
    360                 return value;
    361             }
    362             return value.substring(start, end + 1);
    363         }
    364 
    365         /**
    366          * Check if c is whitespace.
    367          * @param c character to test
    368          * @return true if c is a whitespace character
    369          */
    370         private boolean isWhitespace(char c) {
    371             switch(c) {
    372             case ' ':
    373             case '\t':
    374             case '\f':
    375             case '\n':
    376             case '\r':
    377             case 11: /* VT */
    378                 return true;
    379             default:
    380                 return false;
    381             }
    382         }
    383 
    384         /**
    385          * Check whether an individual character is legal. Uses the
    386          * flag bit-set passed into the constructor.
    387          * @param c
    388          * @return true if c is a legal character
    389          */
    390         private boolean characterIsLegal(char c) {
    391             switch(c) {
    392             case ' ' : return (mFlags & SPACE_OK) != 0;
    393             case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
    394               return (mFlags & OTHER_WHITESPACE_OK) != 0;
    395             case '\"': return (mFlags & DQUOTE_OK) != 0;
    396             case '\'': return (mFlags & SQUOTE_OK) != 0;
    397             case '<' : return (mFlags & LT_OK) != 0;
    398             case '>' : return (mFlags & GT_OK) != 0;
    399             case '&' : return (mFlags & AMP_OK) != 0;
    400             case '%' : return (mFlags & PCT_OK) != 0;
    401             case '\0': return (mFlags & NUL_OK) != 0;
    402             default  : return (c >= 32 && c < 127) ||
    403                 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
    404             }
    405         }
    406     }
    407 
    408     /**
    409      * Get the current value sanitizer used when processing
    410      * unregistered parameter values.
    411      * <p>
    412      * <b>Note:</b> The default unregistered parameter value sanitizer is
    413      * one that doesn't allow any special characters, similar to what
    414      * is returned by calling createAllIllegal.
    415      *
    416      * @return the current ValueSanitizer used to sanitize unregistered
    417      * parameter values.
    418      */
    419     public ValueSanitizer getUnregisteredParameterValueSanitizer() {
    420         return mUnregisteredParameterValueSanitizer;
    421     }
    422 
    423     /**
    424      * Set the value sanitizer used when processing unregistered
    425      * parameter values.
    426      * @param sanitizer set the ValueSanitizer used to sanitize unregistered
    427      * parameter values.
    428      */
    429     public void setUnregisteredParameterValueSanitizer(
    430             ValueSanitizer sanitizer) {
    431         mUnregisteredParameterValueSanitizer = sanitizer;
    432     }
    433 
    434 
    435     // Private fields for singleton sanitizers:
    436 
    437     private static final ValueSanitizer sAllIllegal =
    438         new IllegalCharacterValueSanitizer(
    439                 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
    440 
    441     private static final ValueSanitizer sAllButNulLegal =
    442         new IllegalCharacterValueSanitizer(
    443                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
    444 
    445     private static final ValueSanitizer sAllButWhitespaceLegal =
    446         new IllegalCharacterValueSanitizer(
    447                 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
    448 
    449     private static final ValueSanitizer sURLLegal =
    450         new IllegalCharacterValueSanitizer(
    451                 IllegalCharacterValueSanitizer.URL_LEGAL);
    452 
    453     private static final ValueSanitizer sUrlAndSpaceLegal =
    454         new IllegalCharacterValueSanitizer(
    455                 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
    456 
    457     private static final ValueSanitizer sAmpLegal =
    458         new IllegalCharacterValueSanitizer(
    459                 IllegalCharacterValueSanitizer.AMP_LEGAL);
    460 
    461     private static final ValueSanitizer sAmpAndSpaceLegal =
    462         new IllegalCharacterValueSanitizer(
    463                 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
    464 
    465     private static final ValueSanitizer sSpaceLegal =
    466         new IllegalCharacterValueSanitizer(
    467                 IllegalCharacterValueSanitizer.SPACE_LEGAL);
    468 
    469     private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
    470         new IllegalCharacterValueSanitizer(
    471                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
    472 
    473     /**
    474      * Return a value sanitizer that does not allow any special characters,
    475      * and also does not allow script URLs.
    476      * @return a value sanitizer
    477      */
    478     public static final ValueSanitizer getAllIllegal() {
    479         return sAllIllegal;
    480     }
    481 
    482     /**
    483      * Return a value sanitizer that allows everything except Nul ('\0')
    484      * characters. Script URLs are allowed.
    485      * @return a value sanitizer
    486      */
    487     public static final ValueSanitizer getAllButNulLegal() {
    488         return sAllButNulLegal;
    489     }
    490     /**
    491      * Return a value sanitizer that allows everything except Nul ('\0')
    492      * characters, space (' '), and other whitespace characters.
    493      * Script URLs are allowed.
    494      * @return a value sanitizer
    495      */
    496     public static final ValueSanitizer getAllButWhitespaceLegal() {
    497         return sAllButWhitespaceLegal;
    498     }
    499     /**
    500      * Return a value sanitizer that allows all the characters used by
    501      * encoded URLs. Does not allow script URLs.
    502      * @return a value sanitizer
    503      */
    504     public static final ValueSanitizer getUrlLegal() {
    505         return sURLLegal;
    506     }
    507     /**
    508      * Return a value sanitizer that allows all the characters used by
    509      * encoded URLs and allows spaces, which are not technically legal
    510      * in encoded URLs, but commonly appear anyway.
    511      * Does not allow script URLs.
    512      * @return a value sanitizer
    513      */
    514     public static final ValueSanitizer getUrlAndSpaceLegal() {
    515         return sUrlAndSpaceLegal;
    516     }
    517     /**
    518      * Return a value sanitizer that does not allow any special characters
    519      * except ampersand ('&'). Does not allow script URLs.
    520      * @return a value sanitizer
    521      */
    522     public static final ValueSanitizer getAmpLegal() {
    523         return sAmpLegal;
    524     }
    525     /**
    526      * Return a value sanitizer that does not allow any special characters
    527      * except ampersand ('&') and space (' '). Does not allow script URLs.
    528      * @return a value sanitizer
    529      */
    530     public static final ValueSanitizer getAmpAndSpaceLegal() {
    531         return sAmpAndSpaceLegal;
    532     }
    533     /**
    534      * Return a value sanitizer that does not allow any special characters
    535      * except space (' '). Does not allow script URLs.
    536      * @return a value sanitizer
    537      */
    538     public static final ValueSanitizer getSpaceLegal() {
    539         return sSpaceLegal;
    540     }
    541     /**
    542      * Return a value sanitizer that allows any special characters
    543      * except angle brackets ('<' and '>') and Nul ('\0').
    544      * Allows script URLs.
    545      * @return a value sanitizer
    546      */
    547     public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
    548         return sAllButNulAndAngleBracketsLegal;
    549     }
    550 
    551     /**
    552      * Constructs a UrlQuerySanitizer.
    553      * <p>
    554      * Defaults:
    555      * <ul>
    556      * <li>unregistered parameters are not allowed.
    557      * <li>the last instance of a repeated parameter is preferred.
    558      * <li>The default value sanitizer is an AllIllegal value sanitizer.
    559      * <ul>
    560      */
    561     public UrlQuerySanitizer() {
    562     }
    563 
    564     /**
    565      * Constructs a UrlQuerySanitizer and parse a URL.
    566      * This constructor is provided for convenience when the
    567      * default parsing behavior is acceptable.
    568      * <p>
    569      * Because the URL is parsed before the constructor returns, there isn't
    570      * a chance to configure the sanitizer to change the parsing behavior.
    571      * <p>
    572      * <code>
    573      * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
    574      * String name = sanitizer.getValue("name");
    575      * </code>
    576      * <p>
    577      * Defaults:
    578      * <ul>
    579      * <li>unregistered parameters <em>are</em> allowed.
    580      * <li>the last instance of a repeated parameter is preferred.
    581      * <li>The default value sanitizer is an AllIllegal value sanitizer.
    582      * <ul>
    583      */
    584     public UrlQuerySanitizer(String url) {
    585         setAllowUnregisteredParamaters(true);
    586         parseUrl(url);
    587     }
    588 
    589     /**
    590      * Parse the query parameters out of an encoded URL.
    591      * Works by extracting the query portion from the URL and then
    592      * calling parseQuery(). If there is no query portion it is
    593      * treated as if the query portion is an empty string.
    594      * @param url the encoded URL to parse.
    595      */
    596     public void parseUrl(String url) {
    597         int queryIndex = url.indexOf('?');
    598         String query;
    599         if (queryIndex >= 0) {
    600             query = url.substring(queryIndex + 1);
    601         }
    602         else {
    603             query = "";
    604         }
    605         parseQuery(query);
    606     }
    607 
    608     /**
    609      * Parse a query. A query string is any number of parameter-value clauses
    610      * separated by any non-zero number of ampersands. A parameter-value clause
    611      * is a parameter followed by an equal sign, followed by a value. If the
    612      * equal sign is missing, the value is assumed to be the empty string.
    613      * @param query the query to parse.
    614      */
    615     public void parseQuery(String query) {
    616         clear();
    617         // Split by '&'
    618         StringTokenizer tokenizer = new StringTokenizer(query, "&");
    619         while(tokenizer.hasMoreElements()) {
    620             String attributeValuePair = tokenizer.nextToken();
    621             if (attributeValuePair.length() > 0) {
    622                 int assignmentIndex = attributeValuePair.indexOf('=');
    623                 if (assignmentIndex < 0) {
    624                     // No assignment found, treat as if empty value
    625                     parseEntry(attributeValuePair, "");
    626                 }
    627                 else {
    628                     parseEntry(attributeValuePair.substring(0, assignmentIndex),
    629                             attributeValuePair.substring(assignmentIndex + 1));
    630                 }
    631             }
    632         }
    633     }
    634 
    635     /**
    636      * Get a set of all of the parameters found in the sanitized query.
    637      * <p>
    638      * Note: Do not modify this set. Treat it as a read-only set.
    639      * @return all the parameters found in the current query.
    640      */
    641     public Set<String> getParameterSet() {
    642         return mEntries.keySet();
    643     }
    644 
    645     /**
    646      * An array list of all of the parameter value pairs in the sanitized
    647      * query, in the order they appeared in the query. May contain duplicate
    648      * parameters.
    649      * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
    650      */
    651     public List<ParameterValuePair> getParameterList() {
    652         return mEntriesList;
    653     }
    654 
    655     /**
    656      * Check if a parameter exists in the current sanitized query.
    657      * @param parameter the unencoded name of a parameter.
    658      * @return true if the paramater exists in the current sanitized queary.
    659      */
    660     public boolean hasParameter(String parameter) {
    661         return mEntries.containsKey(parameter);
    662     }
    663 
    664     /**
    665      * Get the value for a parameter in the current sanitized query.
    666      * Returns null if the parameter does not
    667      * exit.
    668      * @param parameter the unencoded name of a parameter.
    669      * @return the sanitized unencoded value of the parameter,
    670      * or null if the parameter does not exist.
    671      */
    672     public String getValue(String parameter) {
    673         return mEntries.get(parameter);
    674     }
    675 
    676     /**
    677      * Register a value sanitizer for a particular parameter. Can also be used
    678      * to replace or remove an already-set value sanitizer.
    679      * <p>
    680      * Registering a non-null value sanitizer for a particular parameter
    681      * makes that parameter a registered parameter.
    682      * @param parameter an unencoded parameter name
    683      * @param valueSanitizer the value sanitizer to use for a particular
    684      * parameter. May be null in order to unregister that parameter.
    685      * @see #getAllowUnregisteredParamaters()
    686      */
    687     public void registerParameter(String parameter,
    688             ValueSanitizer valueSanitizer) {
    689         if (valueSanitizer == null) {
    690             mSanitizers.remove(parameter);
    691         }
    692         mSanitizers.put(parameter, valueSanitizer);
    693     }
    694 
    695     /**
    696      * Register a value sanitizer for an array of parameters.
    697      * @param parameters An array of unencoded parameter names.
    698      * @param valueSanitizer
    699      * @see #registerParameter
    700      */
    701     public void registerParameters(String[] parameters,
    702             ValueSanitizer valueSanitizer) {
    703         int length = parameters.length;
    704         for(int i = 0; i < length; i++) {
    705             mSanitizers.put(parameters[i], valueSanitizer);
    706         }
    707     }
    708 
    709     /**
    710      * Set whether or not unregistered parameters are allowed. If they
    711      * are not allowed, then they will be dropped when a query is sanitized.
    712      * <p>
    713      * Defaults to false.
    714      * @param allowUnregisteredParamaters true to allow unregistered parameters.
    715      * @see #getAllowUnregisteredParamaters()
    716      */
    717     public void setAllowUnregisteredParamaters(
    718             boolean allowUnregisteredParamaters) {
    719         mAllowUnregisteredParamaters = allowUnregisteredParamaters;
    720     }
    721 
    722     /**
    723      * Get whether or not unregistered parameters are allowed. If not
    724      * allowed, they will be dropped when a query is parsed.
    725      * @return true if unregistered parameters are allowed.
    726      * @see #setAllowUnregisteredParamaters(boolean)
    727      */
    728     public boolean getAllowUnregisteredParamaters() {
    729         return mAllowUnregisteredParamaters;
    730     }
    731 
    732     /**
    733      * Set whether or not the first occurrence of a repeated parameter is
    734      * preferred. True means the first repeated parameter is preferred.
    735      * False means that the last repeated parameter is preferred.
    736      * <p>
    737      * The preferred parameter is the one that is returned when getParameter
    738      * is called.
    739      * <p>
    740      * defaults to false.
    741      * @param preferFirstRepeatedParameter True if the first repeated
    742      * parameter is preferred.
    743      * @see #getPreferFirstRepeatedParameter()
    744      */
    745     public void setPreferFirstRepeatedParameter(
    746             boolean preferFirstRepeatedParameter) {
    747         mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
    748     }
    749 
    750     /**
    751      * Get whether or not the first occurrence of a repeated parameter is
    752      * preferred.
    753      * @return true if the first occurrence of a repeated parameter is
    754      * preferred.
    755      * @see #setPreferFirstRepeatedParameter(boolean)
    756      */
    757     public boolean getPreferFirstRepeatedParameter() {
    758         return mPreferFirstRepeatedParameter;
    759     }
    760 
    761     /**
    762      * Parse an escaped parameter-value pair. The default implementation
    763      * unescapes both the parameter and the value, then looks up the
    764      * effective value sanitizer for the parameter and uses it to sanitize
    765      * the value. If all goes well then addSanitizedValue is called with
    766      * the unescaped parameter and the sanitized unescaped value.
    767      * @param parameter an escaped parameter
    768      * @param value an unsanitzied escaped value
    769      */
    770     protected void parseEntry(String parameter, String value) {
    771         String unescapedParameter = unescape(parameter);
    772          ValueSanitizer valueSanitizer =
    773             getEffectiveValueSanitizer(unescapedParameter);
    774 
    775         if (valueSanitizer == null) {
    776             return;
    777         }
    778         String unescapedValue = unescape(value);
    779         String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
    780         addSanitizedEntry(unescapedParameter, sanitizedValue);
    781     }
    782 
    783     /**
    784      * Record a sanitized parameter-value pair. Override if you want to
    785      * do additional filtering or validation.
    786      * @param parameter an unescaped parameter
    787      * @param value a sanitized unescaped value
    788      */
    789     protected void addSanitizedEntry(String parameter, String value) {
    790         mEntriesList.add(
    791                 new ParameterValuePair(parameter, value));
    792         if (mPreferFirstRepeatedParameter) {
    793             if (mEntries.containsKey(parameter)) {
    794                 return;
    795             }
    796         }
    797         mEntries.put(parameter, value);
    798     }
    799 
    800     /**
    801      * Get the value sanitizer for a parameter. Returns null if there
    802      * is no value sanitizer registered for the parameter.
    803      * @param parameter the unescaped parameter
    804      * @return the currently registered value sanitizer for this parameter.
    805      * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
    806      */
    807     public ValueSanitizer getValueSanitizer(String parameter) {
    808         return mSanitizers.get(parameter);
    809     }
    810 
    811     /**
    812      * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
    813      * except if there is no value sanitizer registered for a parameter, and
    814      * unregistered paramaters are allowed, then the default value sanitizer is
    815      * returned.
    816      * @param parameter an unescaped parameter
    817      * @return the effective value sanitizer for a parameter.
    818      */
    819     public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
    820         ValueSanitizer sanitizer = getValueSanitizer(parameter);
    821         if (sanitizer == null && mAllowUnregisteredParamaters) {
    822             sanitizer = getUnregisteredParameterValueSanitizer();
    823         }
    824         return sanitizer;
    825     }
    826 
    827     /**
    828      * Unescape an escaped string.
    829      * <ul>
    830      * <li>'+' characters are replaced by
    831      * ' ' characters.
    832      * <li>Valid "%xx" escape sequences are replaced by the
    833      * corresponding unescaped character.
    834      * <li>Invalid escape sequences such as %1z", are passed through unchanged.
    835      * <ol>
    836      * @param string the escaped string
    837      * @return the unescaped string.
    838      */
    839     public String unescape(String string) {
    840         // Early exit if no escaped characters.
    841         int firstEscape = string.indexOf('%');
    842         if ( firstEscape < 0) {
    843             firstEscape = string.indexOf('+');
    844             if (firstEscape < 0) {
    845                 return string;
    846             }
    847         }
    848 
    849         int length = string.length();
    850 
    851         StringBuilder stringBuilder = new StringBuilder(length);
    852         stringBuilder.append(string.substring(0, firstEscape));
    853         for (int i = firstEscape; i < length; i++) {
    854             char c = string.charAt(i);
    855             if (c == '+') {
    856                 c = ' ';
    857             }
    858             else if ( c == '%' && i + 2 < length) {
    859                 char c1 = string.charAt(i + 1);
    860                 char c2 = string.charAt(i + 2);
    861                 if (isHexDigit(c1) && isHexDigit(c2)) {
    862                     c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
    863                     i += 2;
    864                 }
    865             }
    866             stringBuilder.append(c);
    867         }
    868         return stringBuilder.toString();
    869     }
    870 
    871     /**
    872      * Test if a character is a hexidecimal digit. Both upper case and lower
    873      * case hex digits are allowed.
    874      * @param c the character to test
    875      * @return true if c is a hex digit.
    876      */
    877     protected boolean isHexDigit(char c) {
    878         return decodeHexDigit(c) >= 0;
    879     }
    880 
    881     /**
    882      * Convert a character that represents a hexidecimal digit into an integer.
    883      * If the character is not a hexidecimal digit, then -1 is returned.
    884      * Both upper case and lower case hex digits are allowed.
    885      * @param c the hexidecimal digit.
    886      * @return the integer value of the hexidecimal digit.
    887      */
    888 
    889     protected int decodeHexDigit(char c) {
    890         if (c >= '0' && c <= '9') {
    891             return c - '0';
    892         }
    893         else if (c >= 'A' && c <= 'F') {
    894             return c - 'A' + 10;
    895         }
    896         else if (c >= 'a' && c <= 'f') {
    897             return c - 'a' + 10;
    898         }
    899         else {
    900             return -1;
    901         }
    902     }
    903 
    904     /**
    905      * Clear the existing entries. Called to get ready to parse a new
    906      * query string.
    907      */
    908     protected void clear() {
    909         mEntries.clear();
    910         mEntriesList.clear();
    911     }
    912 }
    913 
    914