1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.net; 18 19 import java.util.ArrayList; 20 import java.util.HashMap; 21 import java.util.List; 22 import java.util.Locale; 23 import java.util.Set; 24 import java.util.StringTokenizer; 25 26 /** 27 * 28 * Sanitizes the Query portion of a URL. Simple example: 29 * <code> 30 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 31 * sanitizer.setAllowUnregisteredParamaters(true); 32 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 33 * String name = sanitizer.getValue("name")); 34 * // name now contains "Joe_User" 35 * </code> 36 * 37 * Register ValueSanitizers to customize the way individual 38 * parameters are sanitized: 39 * <code> 40 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 41 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal()); 42 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 43 * String name = sanitizer.getValue("name")); 44 * // name now contains "Joe User". (The string is first decoded, which 45 * // converts the '+' to a ' '. Then the string is sanitized, which 46 * // converts the ' ' to an '_'. (The ' ' is converted because the default 47 * unregistered parameter sanitizer does not allow any special characters, 48 * and ' ' is a special character.) 49 * </code> 50 * 51 * There are several ways to create ValueSanitizers. In order of increasing 52 * sophistication: 53 * <ol> 54 * <li>Call one of the UrlQuerySanitizer.createXXX() methods. 55 * <li>Construct your own instance of 56 * UrlQuerySanitizer.IllegalCharacterValueSanitizer. 57 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value 58 * sanitizer. 59 * </ol> 60 * 61 */ 62 public class UrlQuerySanitizer { 63 64 /** 65 * A simple tuple that holds parameter-value pairs. 66 * 67 */ 68 public class ParameterValuePair { 69 /** 70 * Construct a parameter-value tuple. 71 * @param parameter an unencoded parameter 72 * @param value an unencoded value 73 */ 74 public ParameterValuePair(String parameter, 75 String value) { 76 mParameter = parameter; 77 mValue = value; 78 } 79 /** 80 * The unencoded parameter 81 */ 82 public String mParameter; 83 /** 84 * The unencoded value 85 */ 86 public String mValue; 87 } 88 89 final private HashMap<String, ValueSanitizer> mSanitizers = 90 new HashMap<String, ValueSanitizer>(); 91 final private HashMap<String, String> mEntries = 92 new HashMap<String, String>(); 93 final private ArrayList<ParameterValuePair> mEntriesList = 94 new ArrayList<ParameterValuePair>(); 95 private boolean mAllowUnregisteredParamaters; 96 private boolean mPreferFirstRepeatedParameter; 97 private ValueSanitizer mUnregisteredParameterValueSanitizer = 98 getAllIllegal(); 99 100 /** 101 * A functor used to sanitize a single query value. 102 * 103 */ 104 public static interface ValueSanitizer { 105 /** 106 * Sanitize an unencoded value. 107 * @param value 108 * @return the sanitized unencoded value 109 */ 110 public String sanitize(String value); 111 } 112 113 /** 114 * Sanitize values based on which characters they contain. Illegal 115 * characters are replaced with either space or '_', depending upon 116 * whether space is a legal character or not. 117 */ 118 public static class IllegalCharacterValueSanitizer implements 119 ValueSanitizer { 120 private int mFlags; 121 122 /** 123 * Allow space (' ') characters. 124 */ 125 public final static int SPACE_OK = 1 << 0; 126 /** 127 * Allow whitespace characters other than space. The 128 * other whitespace characters are 129 * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab) 130 */ 131 public final static int OTHER_WHITESPACE_OK = 1 << 1; 132 /** 133 * Allow characters with character codes 128 to 255. 134 */ 135 public final static int NON_7_BIT_ASCII_OK = 1 << 2; 136 /** 137 * Allow double quote characters. ('"') 138 */ 139 public final static int DQUOTE_OK = 1 << 3; 140 /** 141 * Allow single quote characters. ('\'') 142 */ 143 public final static int SQUOTE_OK = 1 << 4; 144 /** 145 * Allow less-than characters. ('<') 146 */ 147 public final static int LT_OK = 1 << 5; 148 /** 149 * Allow greater-than characters. ('>') 150 */ 151 public final static int GT_OK = 1 << 6; 152 /** 153 * Allow ampersand characters ('&') 154 */ 155 public final static int AMP_OK = 1 << 7; 156 /** 157 * Allow percent-sign characters ('%') 158 */ 159 public final static int PCT_OK = 1 << 8; 160 /** 161 * Allow nul characters ('\0') 162 */ 163 public final static int NUL_OK = 1 << 9; 164 /** 165 * Allow text to start with a script URL 166 * such as "javascript:" or "vbscript:" 167 */ 168 public final static int SCRIPT_URL_OK = 1 << 10; 169 170 /** 171 * Mask with all fields set to OK 172 */ 173 public final static int ALL_OK = 0x7ff; 174 175 /** 176 * Mask with both regular space and other whitespace OK 177 */ 178 public final static int ALL_WHITESPACE_OK = 179 SPACE_OK | OTHER_WHITESPACE_OK; 180 181 182 // Common flag combinations: 183 184 /** 185 * <ul> 186 * <li>Deny all special characters. 187 * <li>Deny script URLs. 188 * </ul> 189 */ 190 public final static int ALL_ILLEGAL = 191 0; 192 /** 193 * <ul> 194 * <li>Allow all special characters except Nul. ('\0'). 195 * <li>Allow script URLs. 196 * </ul> 197 */ 198 public final static int ALL_BUT_NUL_LEGAL = 199 ALL_OK & ~NUL_OK; 200 /** 201 * <ul> 202 * <li>Allow all special characters except for: 203 * <ul> 204 * <li>whitespace characters 205 * <li>Nul ('\0') 206 * </ul> 207 * <li>Allow script URLs. 208 * </ul> 209 */ 210 public final static int ALL_BUT_WHITESPACE_LEGAL = 211 ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK); 212 /** 213 * <ul> 214 * <li>Allow characters used by encoded URLs. 215 * <li>Deny script URLs. 216 * </ul> 217 */ 218 public final static int URL_LEGAL = 219 NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK; 220 /** 221 * <ul> 222 * <li>Allow characters used by encoded URLs. 223 * <li>Allow spaces. 224 * <li>Deny script URLs. 225 * </ul> 226 */ 227 public final static int URL_AND_SPACE_LEGAL = 228 URL_LEGAL | SPACE_OK; 229 /** 230 * <ul> 231 * <li>Allow ampersand. 232 * <li>Deny script URLs. 233 * </ul> 234 */ 235 public final static int AMP_LEGAL = 236 AMP_OK; 237 /** 238 * <ul> 239 * <li>Allow ampersand. 240 * <li>Allow space. 241 * <li>Deny script URLs. 242 * </ul> 243 */ 244 public final static int AMP_AND_SPACE_LEGAL = 245 AMP_OK | SPACE_OK; 246 /** 247 * <ul> 248 * <li>Allow space. 249 * <li>Deny script URLs. 250 * </ul> 251 */ 252 public final static int SPACE_LEGAL = 253 SPACE_OK; 254 /** 255 * <ul> 256 * <li>Allow all but. 257 * <ul> 258 * <li>Nul ('\0') 259 * <li>Angle brackets ('<', '>') 260 * </ul> 261 * <li>Deny script URLs. 262 * </ul> 263 */ 264 public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL = 265 ALL_OK & ~(NUL_OK | LT_OK | GT_OK); 266 267 /** 268 * Script URL definitions 269 */ 270 271 private final static String JAVASCRIPT_PREFIX = "javascript:"; 272 273 private final static String VBSCRIPT_PREFIX = "vbscript:"; 274 275 private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min( 276 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length()); 277 278 /** 279 * Construct a sanitizer. The parameters set the behavior of the 280 * sanitizer. 281 * @param flags some combination of the XXX_OK flags. 282 */ 283 public IllegalCharacterValueSanitizer( 284 int flags) { 285 mFlags = flags; 286 } 287 /** 288 * Sanitize a value. 289 * <ol> 290 * <li>If script URLs are not OK, the will be removed. 291 * <li>If neither spaces nor other white space is OK, then 292 * white space will be trimmed from the beginning and end of 293 * the URL. (Just the actual white space characters are trimmed, not 294 * other control codes.) 295 * <li> Illegal characters will be replaced with 296 * either ' ' or '_', depending on whether a space is itself a 297 * legal character. 298 * </ol> 299 * @param value 300 * @return the sanitized value 301 */ 302 public String sanitize(String value) { 303 if (value == null) { 304 return null; 305 } 306 int length = value.length(); 307 if ((mFlags & SCRIPT_URL_OK) != 0) { 308 if (length >= MIN_SCRIPT_PREFIX_LENGTH) { 309 String asLower = value.toLowerCase(Locale.ROOT); 310 if (asLower.startsWith(JAVASCRIPT_PREFIX) || 311 asLower.startsWith(VBSCRIPT_PREFIX)) { 312 return ""; 313 } 314 } 315 } 316 317 // If whitespace isn't OK, get rid of whitespace at beginning 318 // and end of value. 319 if ( (mFlags & ALL_WHITESPACE_OK) == 0) { 320 value = trimWhitespace(value); 321 // The length could have changed, so we need to correct 322 // the length variable. 323 length = value.length(); 324 } 325 326 StringBuilder stringBuilder = new StringBuilder(length); 327 for(int i = 0; i < length; i++) { 328 char c = value.charAt(i); 329 if (!characterIsLegal(c)) { 330 if ((mFlags & SPACE_OK) != 0) { 331 c = ' '; 332 } 333 else { 334 c = '_'; 335 } 336 } 337 stringBuilder.append(c); 338 } 339 return stringBuilder.toString(); 340 } 341 342 /** 343 * Trim whitespace from the beginning and end of a string. 344 * <p> 345 * Note: can't use {@link String#trim} because {@link String#trim} has a 346 * different definition of whitespace than we want. 347 * @param value the string to trim 348 * @return the trimmed string 349 */ 350 private String trimWhitespace(String value) { 351 int start = 0; 352 int last = value.length() - 1; 353 int end = last; 354 while (start <= end && isWhitespace(value.charAt(start))) { 355 start++; 356 } 357 while (end >= start && isWhitespace(value.charAt(end))) { 358 end--; 359 } 360 if (start == 0 && end == last) { 361 return value; 362 } 363 return value.substring(start, end + 1); 364 } 365 366 /** 367 * Check if c is whitespace. 368 * @param c character to test 369 * @return true if c is a whitespace character 370 */ 371 private boolean isWhitespace(char c) { 372 switch(c) { 373 case ' ': 374 case '\t': 375 case '\f': 376 case '\n': 377 case '\r': 378 case 11: /* VT */ 379 return true; 380 default: 381 return false; 382 } 383 } 384 385 /** 386 * Check whether an individual character is legal. Uses the 387 * flag bit-set passed into the constructor. 388 * @param c 389 * @return true if c is a legal character 390 */ 391 private boolean characterIsLegal(char c) { 392 switch(c) { 393 case ' ' : return (mFlags & SPACE_OK) != 0; 394 case '\t': case '\f': case '\n': case '\r': case 11: /* VT */ 395 return (mFlags & OTHER_WHITESPACE_OK) != 0; 396 case '\"': return (mFlags & DQUOTE_OK) != 0; 397 case '\'': return (mFlags & SQUOTE_OK) != 0; 398 case '<' : return (mFlags & LT_OK) != 0; 399 case '>' : return (mFlags & GT_OK) != 0; 400 case '&' : return (mFlags & AMP_OK) != 0; 401 case '%' : return (mFlags & PCT_OK) != 0; 402 case '\0': return (mFlags & NUL_OK) != 0; 403 default : return (c >= 32 && c < 127) || 404 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0)); 405 } 406 } 407 } 408 409 /** 410 * Get the current value sanitizer used when processing 411 * unregistered parameter values. 412 * <p> 413 * <b>Note:</b> The default unregistered parameter value sanitizer is 414 * one that doesn't allow any special characters, similar to what 415 * is returned by calling createAllIllegal. 416 * 417 * @return the current ValueSanitizer used to sanitize unregistered 418 * parameter values. 419 */ 420 public ValueSanitizer getUnregisteredParameterValueSanitizer() { 421 return mUnregisteredParameterValueSanitizer; 422 } 423 424 /** 425 * Set the value sanitizer used when processing unregistered 426 * parameter values. 427 * @param sanitizer set the ValueSanitizer used to sanitize unregistered 428 * parameter values. 429 */ 430 public void setUnregisteredParameterValueSanitizer( 431 ValueSanitizer sanitizer) { 432 mUnregisteredParameterValueSanitizer = sanitizer; 433 } 434 435 436 // Private fields for singleton sanitizers: 437 438 private static final ValueSanitizer sAllIllegal = 439 new IllegalCharacterValueSanitizer( 440 IllegalCharacterValueSanitizer.ALL_ILLEGAL); 441 442 private static final ValueSanitizer sAllButNulLegal = 443 new IllegalCharacterValueSanitizer( 444 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL); 445 446 private static final ValueSanitizer sAllButWhitespaceLegal = 447 new IllegalCharacterValueSanitizer( 448 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL); 449 450 private static final ValueSanitizer sURLLegal = 451 new IllegalCharacterValueSanitizer( 452 IllegalCharacterValueSanitizer.URL_LEGAL); 453 454 private static final ValueSanitizer sUrlAndSpaceLegal = 455 new IllegalCharacterValueSanitizer( 456 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL); 457 458 private static final ValueSanitizer sAmpLegal = 459 new IllegalCharacterValueSanitizer( 460 IllegalCharacterValueSanitizer.AMP_LEGAL); 461 462 private static final ValueSanitizer sAmpAndSpaceLegal = 463 new IllegalCharacterValueSanitizer( 464 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL); 465 466 private static final ValueSanitizer sSpaceLegal = 467 new IllegalCharacterValueSanitizer( 468 IllegalCharacterValueSanitizer.SPACE_LEGAL); 469 470 private static final ValueSanitizer sAllButNulAndAngleBracketsLegal = 471 new IllegalCharacterValueSanitizer( 472 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL); 473 474 /** 475 * Return a value sanitizer that does not allow any special characters, 476 * and also does not allow script URLs. 477 * @return a value sanitizer 478 */ 479 public static final ValueSanitizer getAllIllegal() { 480 return sAllIllegal; 481 } 482 483 /** 484 * Return a value sanitizer that allows everything except Nul ('\0') 485 * characters. Script URLs are allowed. 486 * @return a value sanitizer 487 */ 488 public static final ValueSanitizer getAllButNulLegal() { 489 return sAllButNulLegal; 490 } 491 /** 492 * Return a value sanitizer that allows everything except Nul ('\0') 493 * characters, space (' '), and other whitespace characters. 494 * Script URLs are allowed. 495 * @return a value sanitizer 496 */ 497 public static final ValueSanitizer getAllButWhitespaceLegal() { 498 return sAllButWhitespaceLegal; 499 } 500 /** 501 * Return a value sanitizer that allows all the characters used by 502 * encoded URLs. Does not allow script URLs. 503 * @return a value sanitizer 504 */ 505 public static final ValueSanitizer getUrlLegal() { 506 return sURLLegal; 507 } 508 /** 509 * Return a value sanitizer that allows all the characters used by 510 * encoded URLs and allows spaces, which are not technically legal 511 * in encoded URLs, but commonly appear anyway. 512 * Does not allow script URLs. 513 * @return a value sanitizer 514 */ 515 public static final ValueSanitizer getUrlAndSpaceLegal() { 516 return sUrlAndSpaceLegal; 517 } 518 /** 519 * Return a value sanitizer that does not allow any special characters 520 * except ampersand ('&'). Does not allow script URLs. 521 * @return a value sanitizer 522 */ 523 public static final ValueSanitizer getAmpLegal() { 524 return sAmpLegal; 525 } 526 /** 527 * Return a value sanitizer that does not allow any special characters 528 * except ampersand ('&') and space (' '). Does not allow script URLs. 529 * @return a value sanitizer 530 */ 531 public static final ValueSanitizer getAmpAndSpaceLegal() { 532 return sAmpAndSpaceLegal; 533 } 534 /** 535 * Return a value sanitizer that does not allow any special characters 536 * except space (' '). Does not allow script URLs. 537 * @return a value sanitizer 538 */ 539 public static final ValueSanitizer getSpaceLegal() { 540 return sSpaceLegal; 541 } 542 /** 543 * Return a value sanitizer that allows any special characters 544 * except angle brackets ('<' and '>') and Nul ('\0'). 545 * Allows script URLs. 546 * @return a value sanitizer 547 */ 548 public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() { 549 return sAllButNulAndAngleBracketsLegal; 550 } 551 552 /** 553 * Constructs a UrlQuerySanitizer. 554 * <p> 555 * Defaults: 556 * <ul> 557 * <li>unregistered parameters are not allowed. 558 * <li>the last instance of a repeated parameter is preferred. 559 * <li>The default value sanitizer is an AllIllegal value sanitizer. 560 * <ul> 561 */ 562 public UrlQuerySanitizer() { 563 } 564 565 /** 566 * Constructs a UrlQuerySanitizer and parse a URL. 567 * This constructor is provided for convenience when the 568 * default parsing behavior is acceptable. 569 * <p> 570 * Because the URL is parsed before the constructor returns, there isn't 571 * a chance to configure the sanitizer to change the parsing behavior. 572 * <p> 573 * <code> 574 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl); 575 * String name = sanitizer.getValue("name"); 576 * </code> 577 * <p> 578 * Defaults: 579 * <ul> 580 * <li>unregistered parameters <em>are</em> allowed. 581 * <li>the last instance of a repeated parameter is preferred. 582 * <li>The default value sanitizer is an AllIllegal value sanitizer. 583 * <ul> 584 */ 585 public UrlQuerySanitizer(String url) { 586 setAllowUnregisteredParamaters(true); 587 parseUrl(url); 588 } 589 590 /** 591 * Parse the query parameters out of an encoded URL. 592 * Works by extracting the query portion from the URL and then 593 * calling parseQuery(). If there is no query portion it is 594 * treated as if the query portion is an empty string. 595 * @param url the encoded URL to parse. 596 */ 597 public void parseUrl(String url) { 598 int queryIndex = url.indexOf('?'); 599 String query; 600 if (queryIndex >= 0) { 601 query = url.substring(queryIndex + 1); 602 } 603 else { 604 query = ""; 605 } 606 parseQuery(query); 607 } 608 609 /** 610 * Parse a query. A query string is any number of parameter-value clauses 611 * separated by any non-zero number of ampersands. A parameter-value clause 612 * is a parameter followed by an equal sign, followed by a value. If the 613 * equal sign is missing, the value is assumed to be the empty string. 614 * @param query the query to parse. 615 */ 616 public void parseQuery(String query) { 617 clear(); 618 // Split by '&' 619 StringTokenizer tokenizer = new StringTokenizer(query, "&"); 620 while(tokenizer.hasMoreElements()) { 621 String attributeValuePair = tokenizer.nextToken(); 622 if (attributeValuePair.length() > 0) { 623 int assignmentIndex = attributeValuePair.indexOf('='); 624 if (assignmentIndex < 0) { 625 // No assignment found, treat as if empty value 626 parseEntry(attributeValuePair, ""); 627 } 628 else { 629 parseEntry(attributeValuePair.substring(0, assignmentIndex), 630 attributeValuePair.substring(assignmentIndex + 1)); 631 } 632 } 633 } 634 } 635 636 /** 637 * Get a set of all of the parameters found in the sanitized query. 638 * <p> 639 * Note: Do not modify this set. Treat it as a read-only set. 640 * @return all the parameters found in the current query. 641 */ 642 public Set<String> getParameterSet() { 643 return mEntries.keySet(); 644 } 645 646 /** 647 * An array list of all of the parameter value pairs in the sanitized 648 * query, in the order they appeared in the query. May contain duplicate 649 * parameters. 650 * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p> 651 */ 652 public List<ParameterValuePair> getParameterList() { 653 return mEntriesList; 654 } 655 656 /** 657 * Check if a parameter exists in the current sanitized query. 658 * @param parameter the unencoded name of a parameter. 659 * @return true if the paramater exists in the current sanitized queary. 660 */ 661 public boolean hasParameter(String parameter) { 662 return mEntries.containsKey(parameter); 663 } 664 665 /** 666 * Get the value for a parameter in the current sanitized query. 667 * Returns null if the parameter does not 668 * exit. 669 * @param parameter the unencoded name of a parameter. 670 * @return the sanitized unencoded value of the parameter, 671 * or null if the parameter does not exist. 672 */ 673 public String getValue(String parameter) { 674 return mEntries.get(parameter); 675 } 676 677 /** 678 * Register a value sanitizer for a particular parameter. Can also be used 679 * to replace or remove an already-set value sanitizer. 680 * <p> 681 * Registering a non-null value sanitizer for a particular parameter 682 * makes that parameter a registered parameter. 683 * @param parameter an unencoded parameter name 684 * @param valueSanitizer the value sanitizer to use for a particular 685 * parameter. May be null in order to unregister that parameter. 686 * @see #getAllowUnregisteredParamaters() 687 */ 688 public void registerParameter(String parameter, 689 ValueSanitizer valueSanitizer) { 690 if (valueSanitizer == null) { 691 mSanitizers.remove(parameter); 692 } 693 mSanitizers.put(parameter, valueSanitizer); 694 } 695 696 /** 697 * Register a value sanitizer for an array of parameters. 698 * @param parameters An array of unencoded parameter names. 699 * @param valueSanitizer 700 * @see #registerParameter 701 */ 702 public void registerParameters(String[] parameters, 703 ValueSanitizer valueSanitizer) { 704 int length = parameters.length; 705 for(int i = 0; i < length; i++) { 706 mSanitizers.put(parameters[i], valueSanitizer); 707 } 708 } 709 710 /** 711 * Set whether or not unregistered parameters are allowed. If they 712 * are not allowed, then they will be dropped when a query is sanitized. 713 * <p> 714 * Defaults to false. 715 * @param allowUnregisteredParamaters true to allow unregistered parameters. 716 * @see #getAllowUnregisteredParamaters() 717 */ 718 public void setAllowUnregisteredParamaters( 719 boolean allowUnregisteredParamaters) { 720 mAllowUnregisteredParamaters = allowUnregisteredParamaters; 721 } 722 723 /** 724 * Get whether or not unregistered parameters are allowed. If not 725 * allowed, they will be dropped when a query is parsed. 726 * @return true if unregistered parameters are allowed. 727 * @see #setAllowUnregisteredParamaters(boolean) 728 */ 729 public boolean getAllowUnregisteredParamaters() { 730 return mAllowUnregisteredParamaters; 731 } 732 733 /** 734 * Set whether or not the first occurrence of a repeated parameter is 735 * preferred. True means the first repeated parameter is preferred. 736 * False means that the last repeated parameter is preferred. 737 * <p> 738 * The preferred parameter is the one that is returned when getParameter 739 * is called. 740 * <p> 741 * defaults to false. 742 * @param preferFirstRepeatedParameter True if the first repeated 743 * parameter is preferred. 744 * @see #getPreferFirstRepeatedParameter() 745 */ 746 public void setPreferFirstRepeatedParameter( 747 boolean preferFirstRepeatedParameter) { 748 mPreferFirstRepeatedParameter = preferFirstRepeatedParameter; 749 } 750 751 /** 752 * Get whether or not the first occurrence of a repeated parameter is 753 * preferred. 754 * @return true if the first occurrence of a repeated parameter is 755 * preferred. 756 * @see #setPreferFirstRepeatedParameter(boolean) 757 */ 758 public boolean getPreferFirstRepeatedParameter() { 759 return mPreferFirstRepeatedParameter; 760 } 761 762 /** 763 * Parse an escaped parameter-value pair. The default implementation 764 * unescapes both the parameter and the value, then looks up the 765 * effective value sanitizer for the parameter and uses it to sanitize 766 * the value. If all goes well then addSanitizedValue is called with 767 * the unescaped parameter and the sanitized unescaped value. 768 * @param parameter an escaped parameter 769 * @param value an unsanitzied escaped value 770 */ 771 protected void parseEntry(String parameter, String value) { 772 String unescapedParameter = unescape(parameter); 773 ValueSanitizer valueSanitizer = 774 getEffectiveValueSanitizer(unescapedParameter); 775 776 if (valueSanitizer == null) { 777 return; 778 } 779 String unescapedValue = unescape(value); 780 String sanitizedValue = valueSanitizer.sanitize(unescapedValue); 781 addSanitizedEntry(unescapedParameter, sanitizedValue); 782 } 783 784 /** 785 * Record a sanitized parameter-value pair. Override if you want to 786 * do additional filtering or validation. 787 * @param parameter an unescaped parameter 788 * @param value a sanitized unescaped value 789 */ 790 protected void addSanitizedEntry(String parameter, String value) { 791 mEntriesList.add( 792 new ParameterValuePair(parameter, value)); 793 if (mPreferFirstRepeatedParameter) { 794 if (mEntries.containsKey(parameter)) { 795 return; 796 } 797 } 798 mEntries.put(parameter, value); 799 } 800 801 /** 802 * Get the value sanitizer for a parameter. Returns null if there 803 * is no value sanitizer registered for the parameter. 804 * @param parameter the unescaped parameter 805 * @return the currently registered value sanitizer for this parameter. 806 * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer) 807 */ 808 public ValueSanitizer getValueSanitizer(String parameter) { 809 return mSanitizers.get(parameter); 810 } 811 812 /** 813 * Get the effective value sanitizer for a parameter. Like getValueSanitizer, 814 * except if there is no value sanitizer registered for a parameter, and 815 * unregistered paramaters are allowed, then the default value sanitizer is 816 * returned. 817 * @param parameter an unescaped parameter 818 * @return the effective value sanitizer for a parameter. 819 */ 820 public ValueSanitizer getEffectiveValueSanitizer(String parameter) { 821 ValueSanitizer sanitizer = getValueSanitizer(parameter); 822 if (sanitizer == null && mAllowUnregisteredParamaters) { 823 sanitizer = getUnregisteredParameterValueSanitizer(); 824 } 825 return sanitizer; 826 } 827 828 /** 829 * Unescape an escaped string. 830 * <ul> 831 * <li>'+' characters are replaced by 832 * ' ' characters. 833 * <li>Valid "%xx" escape sequences are replaced by the 834 * corresponding unescaped character. 835 * <li>Invalid escape sequences such as %1z", are passed through unchanged. 836 * <ol> 837 * @param string the escaped string 838 * @return the unescaped string. 839 */ 840 public String unescape(String string) { 841 // Early exit if no escaped characters. 842 int firstEscape = string.indexOf('%'); 843 if ( firstEscape < 0) { 844 firstEscape = string.indexOf('+'); 845 if (firstEscape < 0) { 846 return string; 847 } 848 } 849 850 int length = string.length(); 851 852 StringBuilder stringBuilder = new StringBuilder(length); 853 stringBuilder.append(string.substring(0, firstEscape)); 854 for (int i = firstEscape; i < length; i++) { 855 char c = string.charAt(i); 856 if (c == '+') { 857 c = ' '; 858 } 859 else if ( c == '%' && i + 2 < length) { 860 char c1 = string.charAt(i + 1); 861 char c2 = string.charAt(i + 2); 862 if (isHexDigit(c1) && isHexDigit(c2)) { 863 c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2)); 864 i += 2; 865 } 866 } 867 stringBuilder.append(c); 868 } 869 return stringBuilder.toString(); 870 } 871 872 /** 873 * Test if a character is a hexidecimal digit. Both upper case and lower 874 * case hex digits are allowed. 875 * @param c the character to test 876 * @return true if c is a hex digit. 877 */ 878 protected boolean isHexDigit(char c) { 879 return decodeHexDigit(c) >= 0; 880 } 881 882 /** 883 * Convert a character that represents a hexidecimal digit into an integer. 884 * If the character is not a hexidecimal digit, then -1 is returned. 885 * Both upper case and lower case hex digits are allowed. 886 * @param c the hexidecimal digit. 887 * @return the integer value of the hexidecimal digit. 888 */ 889 890 protected int decodeHexDigit(char c) { 891 if (c >= '0' && c <= '9') { 892 return c - '0'; 893 } 894 else if (c >= 'A' && c <= 'F') { 895 return c - 'A' + 10; 896 } 897 else if (c >= 'a' && c <= 'f') { 898 return c - 'a' + 10; 899 } 900 else { 901 return -1; 902 } 903 } 904 905 /** 906 * Clear the existing entries. Called to get ready to parse a new 907 * query string. 908 */ 909 protected void clear() { 910 mEntries.clear(); 911 mEntriesList.clear(); 912 } 913 } 914 915