1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/common/android/address_parser_internal.h" 6 7 #include <bitset> 8 9 #include "base/logging.h" 10 #include "base/strings/string_util.h" 11 12 namespace { 13 14 // Number of digits for a valid zip code. 15 const size_t kZipDigits = 5; 16 17 // Number of digits for a valid zip code in the Zip Plus 4 format. 18 const size_t kZipPlus4Digits = 9; 19 20 // Maximum number of digits of a house number, including possible hyphens. 21 const size_t kMaxHouseDigits = 5; 22 23 base::char16 SafePreviousChar(const base::string16::const_iterator& it, 24 const base::string16::const_iterator& begin) { 25 if (it == begin) 26 return ' '; 27 return *(it - 1); 28 } 29 30 base::char16 SafeNextChar(const base::string16::const_iterator& it, 31 const base::string16::const_iterator& end) { 32 if (it == end) 33 return ' '; 34 return *(it + 1); 35 } 36 37 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin, 38 base::string16::const_iterator word_end, const char* ascii_to_match) { 39 for (base::string16::const_iterator it = word_begin; it != word_end; 40 ++it, ++ascii_to_match) { 41 if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match) 42 return false; 43 } 44 return *ascii_to_match == 0 || *ascii_to_match == ' '; 45 } 46 47 bool LowerCaseEqualsASCIIWithPlural(base::string16::const_iterator word_begin, 48 base::string16::const_iterator word_end, const char* ascii_to_match, 49 bool allow_plural) { 50 for (base::string16::const_iterator it = word_begin; it != word_end; 51 ++it, ++ascii_to_match) { 52 if (!*ascii_to_match && allow_plural && *it == 's' && it + 1 == word_end) 53 return true; 54 55 if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match) 56 return false; 57 } 58 return *ascii_to_match == 0; 59 } 60 61 } // anonymous namespace 62 63 namespace content { 64 65 namespace address_parser { 66 67 namespace internal { 68 69 Word::Word(const base::string16::const_iterator& begin, 70 const base::string16::const_iterator& end) 71 : begin(begin), 72 end(end) { 73 DCHECK(begin <= end); 74 } 75 76 bool HouseNumberParser::IsPreDelimiter(base::char16 character) { 77 return character == ':' || IsPostDelimiter(character); 78 } 79 80 bool HouseNumberParser::IsPostDelimiter(base::char16 character) { 81 return IsWhitespace(character) || strchr(",\"'", character); 82 } 83 84 void HouseNumberParser::RestartOnNextDelimiter() { 85 ResetState(); 86 for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {} 87 } 88 89 void HouseNumberParser::AcceptChars(size_t num_chars) { 90 size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)), 91 num_chars); 92 it_ += offset; 93 result_chars_ += offset; 94 } 95 96 void HouseNumberParser::SkipChars(size_t num_chars) { 97 it_ += std::min(static_cast<size_t>(std::distance(it_, end_)), num_chars); 98 } 99 100 void HouseNumberParser::ResetState() { 101 num_digits_ = 0; 102 result_chars_ = 0; 103 } 104 105 bool HouseNumberParser::CheckFinished(Word* word) const { 106 // There should always be a number after a hyphen. 107 if (result_chars_ == 0 || SafePreviousChar(it_, begin_) == '-') 108 return false; 109 110 if (word) { 111 word->begin = it_ - result_chars_; 112 word->end = it_; 113 } 114 return true; 115 } 116 117 bool HouseNumberParser::Parse( 118 const base::string16::const_iterator& begin, 119 const base::string16::const_iterator& end, Word* word) { 120 it_ = begin_ = begin; 121 end_ = end; 122 ResetState(); 123 124 // Iterations only used as a fail-safe against any buggy infinite loops. 125 size_t iterations = 0; 126 size_t max_iterations = end - begin + 1; 127 for (; it_ != end_ && iterations < max_iterations; ++iterations) { 128 129 // Word finished case. 130 if (IsPostDelimiter(*it_)) { 131 if (CheckFinished(word)) 132 return true; 133 else if (result_chars_) 134 ResetState(); 135 136 SkipChars(1); 137 continue; 138 } 139 140 // More digits. There should be no more after a letter was found. 141 if (IsAsciiDigit(*it_)) { 142 if (num_digits_ >= kMaxHouseDigits) { 143 RestartOnNextDelimiter(); 144 } else { 145 AcceptChars(1); 146 ++num_digits_; 147 } 148 continue; 149 } 150 151 if (IsAsciiAlpha(*it_)) { 152 // Handle special case 'one'. 153 if (result_chars_ == 0) { 154 if (it_ + 3 <= end_ && LowerCaseEqualsASCII(it_, it_ + 3, "one")) 155 AcceptChars(3); 156 else 157 RestartOnNextDelimiter(); 158 continue; 159 } 160 161 // There should be more than 1 character because of result_chars. 162 DCHECK_GT(result_chars_, 0U); 163 DCHECK(it_ != begin_); 164 base::char16 previous = SafePreviousChar(it_, begin_); 165 if (IsAsciiDigit(previous)) { 166 // Check cases like '12A'. 167 base::char16 next = SafeNextChar(it_, end_); 168 if (IsPostDelimiter(next)) { 169 AcceptChars(1); 170 continue; 171 } 172 173 // Handle cases like 12a, 1st, 2nd, 3rd, 7th. 174 if (IsAsciiAlpha(next)) { 175 base::char16 last_digit = previous; 176 base::char16 first_letter = base::ToLowerASCII(*it_); 177 base::char16 second_letter = base::ToLowerASCII(next); 178 bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' && 179 num_digits_ == 2; 180 181 switch (last_digit - '0') { 182 case 1: 183 if ((first_letter == 's' && second_letter == 't') || 184 (first_letter == 't' && second_letter == 'h' && is_teen)) { 185 AcceptChars(2); 186 continue; 187 } 188 break; 189 190 case 2: 191 if ((first_letter == 'n' && second_letter == 'd') || 192 (first_letter == 't' && second_letter == 'h' && is_teen)) { 193 AcceptChars(2); 194 continue; 195 } 196 break; 197 198 case 3: 199 if ((first_letter == 'r' && second_letter == 'd') || 200 (first_letter == 't' && second_letter == 'h' && is_teen)) { 201 AcceptChars(2); 202 continue; 203 } 204 break; 205 206 case 0: 207 // Explicitly exclude '0th'. 208 if (num_digits_ == 1) 209 break; 210 211 case 4: 212 case 5: 213 case 6: 214 case 7: 215 case 8: 216 case 9: 217 if (first_letter == 't' && second_letter == 'h') { 218 AcceptChars(2); 219 continue; 220 } 221 break; 222 223 default: 224 NOTREACHED(); 225 } 226 } 227 } 228 229 RestartOnNextDelimiter(); 230 continue; 231 } 232 233 if (*it_ == '-' && num_digits_ > 0) { 234 AcceptChars(1); 235 ++num_digits_; 236 continue; 237 } 238 239 RestartOnNextDelimiter(); 240 SkipChars(1); 241 } 242 243 if (iterations >= max_iterations) 244 return false; 245 246 return CheckFinished(word); 247 } 248 249 bool FindStateStartingInWord(WordList* words, 250 size_t state_first_word, 251 size_t* state_last_word, 252 String16Tokenizer* tokenizer, 253 size_t* state_index) { 254 255 // Bitmasks containing the allowed suffixes for 2-letter state codes. 256 static const int state_two_letter_suffix[23] = { 257 0x02060c00, // A followed by: [KLRSZ]. 258 0x00000000, // B. 259 0x00084001, // C followed by: [AOT]. 260 0x00000014, // D followed by: [CE]. 261 0x00000000, // E. 262 0x00001800, // F followed by: [LM]. 263 0x00100001, // G followed by: [AU]. 264 0x00000100, // H followed by: [I]. 265 0x00002809, // I followed by: [ADLN]. 266 0x00000000, // J. 267 0x01040000, // K followed by: [SY]. 268 0x00000001, // L followed by: [A]. 269 0x000ce199, // M followed by: [ADEHINOPST]. 270 0x0120129c, // N followed by: [CDEHJMVY]. 271 0x00020480, // O followed by: [HKR]. 272 0x00420001, // P followed by: [ARW]. 273 0x00000000, // Q. 274 0x00000100, // R followed by: [I]. 275 0x0000000c, // S followed by: [CD]. 276 0x00802000, // T followed by: [NX]. 277 0x00080000, // U followed by: [T]. 278 0x00080101, // V followed by: [AIT]. 279 0x01200101 // W followed by: [AIVY]. 280 }; 281 282 // Accumulative number of states for the 2-letter code indexed by the first. 283 static const int state_two_letter_accumulative[24] = { 284 0, 5, 5, 8, 10, 10, 12, 14, 285 15, 19, 19, 21, 22, 32, 40, 43, 286 46, 46, 47, 49, 51, 52, 55, 59 287 }; 288 289 // State names sorted alphabetically with their lengths. 290 // There can be more than one possible name for a same state if desired. 291 static const struct StateNameInfo { 292 const char* string; 293 char first_word_length; 294 char length; 295 char state_index; // Relative to two-character code alphabetical order. 296 } state_names[59] = { 297 { "alabama", 7, 7, 1 }, { "alaska", 6, 6, 0 }, 298 { "american samoa", 8, 14, 3 }, { "arizona", 7, 7, 4 }, 299 { "arkansas", 8, 8, 2 }, 300 { "california", 10, 10, 5 }, { "colorado", 8, 8, 6 }, 301 { "connecticut", 11, 11, 7 }, { "delaware", 8, 8, 9 }, 302 { "district of columbia", 8, 20, 8 }, 303 { "federated states of micronesia", 9, 30, 11 }, { "florida", 7, 7, 10 }, 304 { "guam", 4, 4, 13 }, { "georgia", 7, 7, 12 }, 305 { "hawaii", 6, 6, 14 }, 306 { "idaho", 5, 5, 16 }, { "illinois", 8, 8, 17 }, { "indiana", 7, 7, 18 }, 307 { "iowa", 4, 4, 15 }, 308 { "kansas", 6, 6, 19 }, { "kentucky", 8, 8, 20 }, 309 { "louisiana", 9, 9, 21 }, 310 { "maine", 5, 5, 24 }, { "marshall islands", 8, 16, 25 }, 311 { "maryland", 8, 8, 23 }, { "massachusetts", 13, 13, 22 }, 312 { "michigan", 8, 8, 26 }, { "minnesota", 9, 9, 27 }, 313 { "mississippi", 11, 11, 30 }, { "missouri", 8, 8, 28 }, 314 { "montana", 7, 7, 31 }, 315 { "nebraska", 8, 8, 34 }, { "nevada", 6, 6, 38 }, 316 { "new hampshire", 3, 13, 35 }, { "new jersey", 3, 10, 36 }, 317 { "new mexico", 3, 10, 37 }, { "new york", 3, 8, 39 }, 318 { "north carolina", 5, 14, 32 }, { "north dakota", 5, 12, 33 }, 319 { "northern mariana islands", 8, 24, 29 }, 320 { "ohio", 4, 4, 40 }, { "oklahoma", 8, 8, 41 }, { "oregon", 6, 6, 42 }, 321 { "palau", 5, 5, 45 }, { "pennsylvania", 12, 12, 43 }, 322 { "puerto rico", 6, 11, 44 }, 323 { "rhode island", 5, 5, 46 }, 324 { "south carolina", 5, 14, 47 }, { "south dakota", 5, 12, 48 }, 325 { "tennessee", 9, 9, 49 }, { "texas", 5, 5, 50 }, 326 { "utah", 4, 4, 51 }, 327 { "vermont", 7, 7, 54 }, { "virgin islands", 6, 14, 53 }, 328 { "virginia", 8, 8, 52 }, 329 { "washington", 10, 10, 55 }, { "west virginia", 4, 13, 57 }, 330 { "wisconsin", 9, 9, 56 }, { "wyoming", 7, 7, 58 } 331 }; 332 333 // Accumulative number of states for sorted names indexed by the first letter. 334 // Required a different one since there are codes that don't share their 335 // first letter with the name of their state (MP = Northern Mariana Islands). 336 static const int state_names_accumulative[24] = { 337 0, 5, 5, 8, 10, 10, 12, 14, 338 15, 19, 19, 21, 22, 31, 40, 43, 339 46, 46, 47, 49, 51, 52, 55, 59 340 }; 341 342 DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1], 343 static_cast<int>(ARRAYSIZE_UNSAFE(state_names))); 344 345 const Word& first_word = words->at(state_first_word); 346 int length = first_word.end - first_word.begin; 347 if (length < 2 || !IsAsciiAlpha(*first_word.begin)) 348 return false; 349 350 // No state names start with x, y, z. 351 base::char16 first_letter = base::ToLowerASCII(*first_word.begin); 352 if (first_letter > 'w') 353 return false; 354 355 DCHECK(first_letter >= 'a'); 356 int first_index = first_letter - 'a'; 357 358 // Look for two-letter state names. 359 if (length == 2 && IsAsciiAlpha(*(first_word.begin + 1))) { 360 base::char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1)); 361 DCHECK(second_letter >= 'a'); 362 363 int second_index = second_letter - 'a'; 364 if (!(state_two_letter_suffix[first_index] & (1 << second_index))) 365 return false; 366 367 std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] & 368 ((1 << second_index) - 1); 369 *state_last_word = state_first_word; 370 *state_index = state_two_letter_accumulative[first_index] + 371 previous_suffixes.count(); 372 return true; 373 } 374 375 // Look for full state names by their first letter. Discard by length. 376 for (int state = state_names_accumulative[first_index]; 377 state < state_names_accumulative[first_index + 1]; ++state) { 378 if (state_names[state].first_word_length != length) 379 continue; 380 381 bool state_match = false; 382 size_t state_word = state_first_word; 383 for (int pos = 0; true; ) { 384 if (!WordLowerCaseEqualsASCII(words->at(state_word).begin, 385 words->at(state_word).end, &state_names[state].string[pos])) 386 break; 387 388 pos += words->at(state_word).end - words->at(state_word).begin + 1; 389 if (pos >= state_names[state].length) { 390 state_match = true; 391 break; 392 } 393 394 // Ran out of words, extract more from the tokenizer. 395 if (++state_word == words->size()) { 396 do { 397 if (!tokenizer->GetNext()) 398 break; 399 } while (tokenizer->token_is_delim()); 400 words->push_back( 401 Word(tokenizer->token_begin(), tokenizer->token_end())); 402 } 403 } 404 405 if (state_match) { 406 *state_last_word = state_word; 407 *state_index = state_names[state].state_index; 408 return true; 409 } 410 } 411 412 return false; 413 } 414 415 bool IsZipValid(const Word& word, size_t state_index) { 416 size_t length = word.end - word.begin; 417 if (length != kZipDigits && length != kZipPlus4Digits + 1) 418 return false; 419 420 for (base::string16::const_iterator it = word.begin; it != word.end; ++it) { 421 size_t pos = it - word.begin; 422 if (IsAsciiDigit(*it) || (*it == '-' && pos == kZipDigits)) 423 continue; 424 return false; 425 } 426 return IsZipValidForState(word, state_index); 427 } 428 429 bool IsZipValidForState(const Word& word, size_t state_index) { 430 // List of valid zip code ranges. 431 static const struct { 432 signed char low; 433 signed char high; 434 signed char exception1; 435 signed char exception2; 436 } zip_range[] = { 437 { 99, 99, -1, -1 }, // AK Alaska. 438 { 35, 36, -1, -1 }, // AL Alabama. 439 { 71, 72, -1, -1 }, // AR Arkansas. 440 { 96, 96, -1, -1 }, // AS American Samoa. 441 { 85, 86, -1, -1 }, // AZ Arizona. 442 { 90, 96, -1, -1 }, // CA California. 443 { 80, 81, -1, -1 }, // CO Colorado. 444 { 6, 6, -1, -1 }, // CT Connecticut. 445 { 20, 20, -1, -1 }, // DC District of Columbia. 446 { 19, 19, -1, -1 }, // DE Delaware. 447 { 32, 34, -1, -1 }, // FL Florida. 448 { 96, 96, -1, -1 }, // FM Federated States of Micronesia. 449 { 30, 31, -1, -1 }, // GA Georgia. 450 { 96, 96, -1, -1 }, // GU Guam. 451 { 96, 96, -1, -1 }, // HI Hawaii. 452 { 50, 52, -1, -1 }, // IA Iowa. 453 { 83, 83, -1, -1 }, // ID Idaho. 454 { 60, 62, -1, -1 }, // IL Illinois. 455 { 46, 47, -1, -1 }, // IN Indiana. 456 { 66, 67, 73, -1 }, // KS Kansas. 457 { 40, 42, -1, -1 }, // KY Kentucky. 458 { 70, 71, -1, -1 }, // LA Louisiana. 459 { 1, 2, -1, -1 }, // MA Massachusetts. 460 { 20, 21, -1, -1 }, // MD Maryland. 461 { 3, 4, -1, -1 }, // ME Maine. 462 { 96, 96, -1, -1 }, // MH Marshall Islands. 463 { 48, 49, -1, -1 }, // MI Michigan. 464 { 55, 56, -1, -1 }, // MN Minnesota. 465 { 63, 65, -1, -1 }, // MO Missouri. 466 { 96, 96, -1, -1 }, // MP Northern Mariana Islands. 467 { 38, 39, -1, -1 }, // MS Mississippi. 468 { 55, 56, -1, -1 }, // MT Montana. 469 { 27, 28, -1, -1 }, // NC North Carolina. 470 { 58, 58, -1, -1 }, // ND North Dakota. 471 { 68, 69, -1, -1 }, // NE Nebraska. 472 { 3, 4, -1, -1 }, // NH New Hampshire. 473 { 7, 8, -1, -1 }, // NJ New Jersey. 474 { 87, 88, 86, -1 }, // NM New Mexico. 475 { 88, 89, 96, -1 }, // NV Nevada. 476 { 10, 14, 0, 6 }, // NY New York. 477 { 43, 45, -1, -1 }, // OH Ohio. 478 { 73, 74, -1, -1 }, // OK Oklahoma. 479 { 97, 97, -1, -1 }, // OR Oregon. 480 { 15, 19, -1, -1 }, // PA Pennsylvania. 481 { 6, 6, 0, 9 }, // PR Puerto Rico. 482 { 96, 96, -1, -1 }, // PW Palau. 483 { 2, 2, -1, -1 }, // RI Rhode Island. 484 { 29, 29, -1, -1 }, // SC South Carolina. 485 { 57, 57, -1, -1 }, // SD South Dakota. 486 { 37, 38, -1, -1 }, // TN Tennessee. 487 { 75, 79, 87, 88 }, // TX Texas. 488 { 84, 84, -1, -1 }, // UT Utah. 489 { 22, 24, 20, -1 }, // VA Virginia. 490 { 6, 9, -1, -1 }, // VI Virgin Islands. 491 { 5, 5, -1, -1 }, // VT Vermont. 492 { 98, 99, -1, -1 }, // WA Washington. 493 { 53, 54, -1, -1 }, // WI Wisconsin. 494 { 24, 26, -1, -1 }, // WV West Virginia. 495 { 82, 83, -1, -1 } // WY Wyoming. 496 }; 497 498 // Zip numeric value for the first two characters. 499 DCHECK(word.begin != word.end); 500 DCHECK(IsAsciiDigit(*word.begin)); 501 DCHECK(IsAsciiDigit(*(word.begin + 1))); 502 int zip_prefix = (*word.begin - '0') * 10 + (*(word.begin + 1) - '0'); 503 504 if ((zip_prefix >= zip_range[state_index].low && 505 zip_prefix <= zip_range[state_index].high) || 506 zip_prefix == zip_range[state_index].exception1 || 507 zip_prefix == zip_range[state_index].exception2) { 508 return true; 509 } 510 return false; 511 } 512 513 bool IsValidLocationName(const Word& word) { 514 // Supported location names sorted alphabetically and grouped by first letter. 515 static const struct LocationNameInfo { 516 const char* string; 517 char length; 518 bool allow_plural; 519 } location_names[157] = { 520 { "alley", 5, false }, { "annex", 5, false }, { "arcade", 6, false }, 521 { "ave", 3, false }, { "ave.", 4, false }, { "avenue", 6, false }, 522 { "alameda", 7, false }, 523 { "bayou", 5, false }, { "beach", 5, false }, { "bend", 4, false }, 524 { "bluff", 5, true }, { "bottom", 6, false }, { "boulevard", 9, false }, 525 { "branch", 6, false }, { "bridge", 6, false }, { "brook", 5, true }, 526 { "burg", 4, true }, { "bypass", 6, false }, { "broadway", 8, false }, 527 { "camino", 6, false }, { "camp", 4, false }, { "canyon", 6, false }, 528 { "cape", 4, false }, { "causeway", 8, false }, { "center", 6, true }, 529 { "circle", 6, true }, { "cliff", 5, true }, { "club", 4, false }, 530 { "common", 6, false }, { "corner", 6, true }, { "course", 6, false }, 531 { "court", 5, true }, { "cove", 4, true }, { "creek", 5, false }, 532 { "crescent", 8, false }, { "crest", 5, false }, { "crossing", 8, false }, 533 { "crossroad", 9, false }, { "curve", 5, false }, { "circulo", 7, false }, 534 { "dale", 4, false }, { "dam", 3, false }, { "divide", 6, false }, 535 { "drive", 5, true }, 536 { "estate", 6, true }, { "expressway", 10, false }, 537 { "extension", 9, true }, 538 { "fall", 4, true }, { "ferry", 5, false }, { "field", 5, true }, 539 { "flat", 4, true }, { "ford", 4, true }, { "forest", 6, false }, 540 { "forge", 5, true }, { "fork", 4, true }, { "fort", 4, false }, 541 { "freeway", 7, false }, 542 { "garden", 6, true }, { "gateway", 7, false }, { "glen", 4, true }, 543 { "green", 5, true }, { "grove", 5, true }, 544 { "harbor", 6, true }, { "haven", 5, false }, { "heights", 7, false }, 545 { "highway", 7, false }, { "hill", 4, true }, { "hollow", 6, false }, 546 { "inlet", 5, false }, { "island", 6, true }, { "isle", 4, false }, 547 { "junction", 8, true }, 548 { "key", 3, true }, { "knoll", 5, true }, 549 { "lake", 4, true }, { "land", 4, false }, { "landing", 7, false }, 550 { "lane", 4, false }, { "light", 5, true }, { "loaf", 4, false }, 551 { "lock", 4, true }, { "lodge", 5, false }, { "loop", 4, false }, 552 { "mall", 4, false }, { "manor", 5, true }, { "meadow", 6, true }, 553 { "mews", 4, false }, { "mill", 4, true }, { "mission", 7, false }, 554 { "motorway", 8, false }, { "mount", 5, false }, { "mountain", 8, true }, 555 { "neck", 4, false }, 556 { "orchard", 7, false }, { "oval", 4, false }, { "overpass", 8, false }, 557 { "park", 4, true }, { "parkway", 7, true }, { "pass", 4, false }, 558 { "passage", 7, false }, { "path", 4, false }, { "pike", 4, false }, 559 { "pine", 4, true }, { "plain", 5, true }, { "plaza", 5, false }, 560 { "point", 5, true }, { "port", 4, true }, { "prairie", 7, false }, 561 { "privada", 7, false }, 562 { "radial", 6, false }, { "ramp", 4, false }, { "ranch", 5, false }, 563 { "rapid", 5, true }, { "rest", 4, false }, { "ridge", 5, true }, 564 { "river", 5, false }, { "road", 4, true }, { "route", 5, false }, 565 { "row", 3, false }, { "rue", 3, false }, { "run", 3, false }, 566 { "shoal", 5, true }, { "shore", 5, true }, { "skyway", 6, false }, 567 { "spring", 6, true }, { "spur", 4, true }, { "square", 6, true }, 568 { "station", 7, false }, { "stravenue", 9, false }, { "stream", 6, false }, 569 { "st", 2, false }, { "st.", 3, false }, { "street", 6, true }, 570 { "summit", 6, false }, { "speedway", 8, false }, 571 { "terrace", 7, false }, { "throughway", 10, false }, { "trace", 5, false }, 572 { "track", 5, false }, { "trafficway", 10, false }, { "trail", 5, false }, 573 { "tunnel", 6, false }, { "turnpike", 8, false }, 574 { "underpass", 9, false }, { "union", 5, true }, 575 { "valley", 6, true }, { "viaduct", 7, false }, { "view", 4, true }, 576 { "village", 7, true }, { "ville", 5, false }, { "vista", 5, false }, 577 { "walk", 4, true }, { "wall", 4, false }, { "way", 3, true }, 578 { "well", 4, true }, 579 { "xing", 4, false }, { "xrd", 3, false } 580 }; 581 582 // Accumulative number of location names for each starting letter. 583 static const int location_names_accumulative[25] = { 584 0, 7, 19, 40, 44, 585 47, 57, 62, 68, 71, 586 72, 74, 83, 92, 93, 587 96, 109, 109, 121, 135, 588 143, 145, 151, 155, 157 589 }; 590 591 DCHECK_EQ( 592 location_names_accumulative[arraysize(location_names_accumulative) - 1], 593 static_cast<int>(ARRAYSIZE_UNSAFE(location_names))); 594 595 if (!IsAsciiAlpha(*word.begin)) 596 return false; 597 598 // No location names start with y, z. 599 base::char16 first_letter = base::ToLowerASCII(*word.begin); 600 if (first_letter > 'x') 601 return false; 602 603 DCHECK(first_letter >= 'a'); 604 int index = first_letter - 'a'; 605 int length = std::distance(word.begin, word.end); 606 for (int i = location_names_accumulative[index]; 607 i < location_names_accumulative[index + 1]; ++i) { 608 if (location_names[i].length != length && 609 (location_names[i].allow_plural && 610 location_names[i].length + 1 != length)) { 611 continue; 612 } 613 614 if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end, 615 location_names[i].string, 616 location_names[i].allow_plural)) { 617 return true; 618 } 619 } 620 621 return false; 622 } 623 624 } // namespace internal 625 626 } // namespace address_parser 627 628 } // namespace content 629