Home | History | Annotate | Download | only in android
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/common/android/address_parser_internal.h"
      6 
      7 #include <bitset>
      8 
      9 #include "base/logging.h"
     10 #include "base/strings/string_util.h"
     11 
     12 namespace {
     13 
     14 // Number of digits for a valid zip code.
     15 const size_t kZipDigits = 5;
     16 
     17 // Number of digits for a valid zip code in the Zip Plus 4 format.
     18 const size_t kZipPlus4Digits = 9;
     19 
     20 // Maximum number of digits of a house number, including possible hyphens.
     21 const size_t kMaxHouseDigits = 5;
     22 
     23 char16 SafePreviousChar(const base::string16::const_iterator& it,
     24     const base::string16::const_iterator& begin) {
     25   if (it == begin)
     26     return ' ';
     27   return *(it - 1);
     28 }
     29 
     30 char16 SafeNextChar(const base::string16::const_iterator& it,
     31     const base::string16::const_iterator& end) {
     32   if (it == end)
     33     return ' ';
     34   return *(it + 1);
     35 }
     36 
     37 bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin,
     38     base::string16::const_iterator word_end, const char* ascii_to_match) {
     39   for (base::string16::const_iterator it = word_begin; it != word_end;
     40       ++it, ++ascii_to_match) {
     41     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
     42       return false;
     43   }
     44   return *ascii_to_match == 0 || *ascii_to_match == ' ';
     45 }
     46 
     47 bool LowerCaseEqualsASCIIWithPlural(base::string16::const_iterator word_begin,
     48     base::string16::const_iterator word_end, const char* ascii_to_match,
     49     bool allow_plural) {
     50   for (base::string16::const_iterator it = word_begin; it != word_end;
     51       ++it, ++ascii_to_match) {
     52     if (!*ascii_to_match && allow_plural && *it == 's' && it + 1 == word_end)
     53       return true;
     54 
     55     if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
     56       return false;
     57   }
     58   return *ascii_to_match == 0;
     59 }
     60 
     61 }  // anonymous namespace
     62 
     63 namespace content {
     64 
     65 namespace address_parser {
     66 
     67 namespace internal {
     68 
     69 Word::Word(const base::string16::const_iterator& begin,
     70            const base::string16::const_iterator& end)
     71     : begin(begin),
     72       end(end) {
     73   DCHECK(begin <= end);
     74 }
     75 
     76 bool HouseNumberParser::IsPreDelimiter(char16 character) {
     77   return character == ':' || IsPostDelimiter(character);
     78 }
     79 
     80 bool HouseNumberParser::IsPostDelimiter(char16 character) {
     81   return IsWhitespace(character) || strchr(",\"'", character);
     82 }
     83 
     84 void HouseNumberParser::RestartOnNextDelimiter() {
     85   ResetState();
     86   for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {}
     87 }
     88 
     89 void HouseNumberParser::AcceptChars(size_t num_chars) {
     90   size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)),
     91                            num_chars);
     92   it_ += offset;
     93   result_chars_ += offset;
     94 }
     95 
     96 void HouseNumberParser::SkipChars(size_t num_chars) {
     97   it_ += std::min(static_cast<size_t>(std::distance(it_, end_)), num_chars);
     98 }
     99 
    100 void HouseNumberParser::ResetState() {
    101   num_digits_ = 0;
    102   result_chars_ = 0;
    103 }
    104 
    105 bool HouseNumberParser::CheckFinished(Word* word) const {
    106   // There should always be a number after a hyphen.
    107   if (result_chars_ == 0 || SafePreviousChar(it_, begin_) == '-')
    108     return false;
    109 
    110   if (word) {
    111     word->begin = it_ - result_chars_;
    112     word->end = it_;
    113   }
    114   return true;
    115 }
    116 
    117 bool HouseNumberParser::Parse(
    118     const base::string16::const_iterator& begin,
    119     const base::string16::const_iterator& end, Word* word) {
    120   it_ = begin_ = begin;
    121   end_ = end;
    122   ResetState();
    123 
    124   // Iterations only used as a fail-safe against any buggy infinite loops.
    125   size_t iterations = 0;
    126   size_t max_iterations = end - begin + 1;
    127   for (; it_ != end_ && iterations < max_iterations; ++iterations) {
    128 
    129     // Word finished case.
    130     if (IsPostDelimiter(*it_)) {
    131       if (CheckFinished(word))
    132         return true;
    133       else if (result_chars_)
    134         ResetState();
    135 
    136       SkipChars(1);
    137       continue;
    138     }
    139 
    140     // More digits. There should be no more after a letter was found.
    141     if (IsAsciiDigit(*it_)) {
    142       if (num_digits_ >= kMaxHouseDigits) {
    143         RestartOnNextDelimiter();
    144       } else {
    145         AcceptChars(1);
    146         ++num_digits_;
    147       }
    148       continue;
    149     }
    150 
    151     if (IsAsciiAlpha(*it_)) {
    152       // Handle special case 'one'.
    153       if (result_chars_ == 0) {
    154         if (it_ + 3 <= end_ && LowerCaseEqualsASCII(it_, it_ + 3, "one"))
    155           AcceptChars(3);
    156         else
    157           RestartOnNextDelimiter();
    158         continue;
    159       }
    160 
    161       // There should be more than 1 character because of result_chars.
    162       DCHECK_GT(result_chars_, 0U);
    163       DCHECK(it_ != begin_);
    164       char16 previous = SafePreviousChar(it_, begin_);
    165       if (IsAsciiDigit(previous)) {
    166         // Check cases like '12A'.
    167         char16 next = SafeNextChar(it_, end_);
    168         if (IsPostDelimiter(next)) {
    169           AcceptChars(1);
    170           continue;
    171         }
    172 
    173         // Handle cases like 12a, 1st, 2nd, 3rd, 7th.
    174         if (IsAsciiAlpha(next)) {
    175           char16 last_digit = previous;
    176           char16 first_letter = base::ToLowerASCII(*it_);
    177           char16 second_letter = base::ToLowerASCII(next);
    178           bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' &&
    179               num_digits_ == 2;
    180 
    181           switch (last_digit - '0') {
    182           case 1:
    183             if ((first_letter == 's' && second_letter == 't') ||
    184                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
    185               AcceptChars(2);
    186               continue;
    187             }
    188             break;
    189 
    190           case 2:
    191             if ((first_letter == 'n' && second_letter == 'd') ||
    192                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
    193               AcceptChars(2);
    194               continue;
    195             }
    196             break;
    197 
    198           case 3:
    199             if ((first_letter == 'r' && second_letter == 'd') ||
    200                 (first_letter == 't' && second_letter == 'h' && is_teen)) {
    201               AcceptChars(2);
    202               continue;
    203             }
    204             break;
    205 
    206           case 0:
    207             // Explicitly exclude '0th'.
    208             if (num_digits_ == 1)
    209               break;
    210 
    211           case 4:
    212           case 5:
    213           case 6:
    214           case 7:
    215           case 8:
    216           case 9:
    217             if (first_letter == 't' && second_letter == 'h') {
    218               AcceptChars(2);
    219               continue;
    220             }
    221             break;
    222 
    223           default:
    224             NOTREACHED();
    225           }
    226         }
    227       }
    228 
    229       RestartOnNextDelimiter();
    230       continue;
    231     }
    232 
    233     if (*it_ == '-' && num_digits_ > 0) {
    234       AcceptChars(1);
    235       ++num_digits_;
    236       continue;
    237     }
    238 
    239     RestartOnNextDelimiter();
    240     SkipChars(1);
    241   }
    242 
    243   if (iterations >= max_iterations)
    244     return false;
    245 
    246   return CheckFinished(word);
    247 }
    248 
    249 bool FindStateStartingInWord(WordList* words,
    250                              size_t state_first_word,
    251                              size_t* state_last_word,
    252                              String16Tokenizer* tokenizer,
    253                              size_t* state_index) {
    254 
    255   // Bitmasks containing the allowed suffixes for 2-letter state codes.
    256   static const int state_two_letter_suffix[23] = {
    257     0x02060c00,  // A followed by: [KLRSZ].
    258     0x00000000,  // B.
    259     0x00084001,  // C followed by: [AOT].
    260     0x00000014,  // D followed by: [CE].
    261     0x00000000,  // E.
    262     0x00001800,  // F followed by: [LM].
    263     0x00100001,  // G followed by: [AU].
    264     0x00000100,  // H followed by: [I].
    265     0x00002809,  // I followed by: [ADLN].
    266     0x00000000,  // J.
    267     0x01040000,  // K followed by: [SY].
    268     0x00000001,  // L followed by: [A].
    269     0x000ce199,  // M followed by: [ADEHINOPST].
    270     0x0120129c,  // N followed by: [CDEHJMVY].
    271     0x00020480,  // O followed by: [HKR].
    272     0x00420001,  // P followed by: [ARW].
    273     0x00000000,  // Q.
    274     0x00000100,  // R followed by: [I].
    275     0x0000000c,  // S followed by: [CD].
    276     0x00802000,  // T followed by: [NX].
    277     0x00080000,  // U followed by: [T].
    278     0x00080101,  // V followed by: [AIT].
    279     0x01200101   // W followed by: [AIVY].
    280   };
    281 
    282   // Accumulative number of states for the 2-letter code indexed by the first.
    283   static const int state_two_letter_accumulative[24] = {
    284      0,  5,  5,  8, 10, 10, 12, 14,
    285     15, 19, 19, 21, 22, 32, 40, 43,
    286     46, 46, 47, 49, 51, 52, 55, 59
    287   };
    288 
    289   // State names sorted alphabetically with their lengths.
    290   // There can be more than one possible name for a same state if desired.
    291   static const struct StateNameInfo {
    292     const char* string;
    293     char first_word_length;
    294     char length;
    295     char state_index; // Relative to two-character code alphabetical order.
    296   } state_names[59] = {
    297     { "alabama", 7, 7, 1 }, { "alaska", 6, 6, 0 },
    298     { "american samoa", 8, 14, 3 }, { "arizona", 7, 7, 4 },
    299     { "arkansas", 8, 8, 2 },
    300     { "california", 10, 10, 5 }, { "colorado", 8, 8, 6 },
    301     { "connecticut", 11, 11, 7 }, { "delaware", 8, 8, 9 },
    302     { "district of columbia", 8, 20, 8 },
    303     { "federated states of micronesia", 9, 30, 11 }, { "florida", 7, 7, 10 },
    304     { "guam", 4, 4, 13 }, { "georgia", 7, 7, 12 },
    305     { "hawaii", 6, 6, 14 },
    306     { "idaho", 5, 5, 16 }, { "illinois", 8, 8, 17 }, { "indiana", 7, 7, 18 },
    307     { "iowa", 4, 4, 15 },
    308     { "kansas", 6, 6, 19 }, { "kentucky", 8, 8, 20 },
    309     { "louisiana", 9, 9, 21 },
    310     { "maine", 5, 5, 24 }, { "marshall islands", 8, 16, 25 },
    311     { "maryland", 8, 8, 23 }, { "massachusetts", 13, 13, 22 },
    312     { "michigan", 8, 8, 26 }, { "minnesota", 9, 9, 27 },
    313     { "mississippi", 11, 11, 30 }, { "missouri", 8, 8, 28 },
    314     { "montana", 7, 7, 31 },
    315     { "nebraska", 8, 8, 34 }, { "nevada", 6, 6, 38 },
    316     { "new hampshire", 3, 13, 35 }, { "new jersey", 3, 10, 36 },
    317     { "new mexico", 3, 10, 37 }, { "new york", 3, 8, 39 },
    318     { "north carolina", 5, 14, 32 }, { "north dakota", 5, 12, 33 },
    319     { "northern mariana islands", 8, 24, 29 },
    320     { "ohio", 4, 4, 40 }, { "oklahoma", 8, 8, 41 }, { "oregon", 6, 6, 42 },
    321     { "palau", 5, 5, 45 }, { "pennsylvania", 12, 12, 43 },
    322     { "puerto rico", 6, 11, 44 },
    323     { "rhode island", 5, 5, 46 },
    324     { "south carolina", 5, 14, 47 }, { "south dakota", 5, 12, 48 },
    325     { "tennessee", 9, 9, 49 }, { "texas", 5, 5, 50 },
    326     { "utah", 4, 4, 51 },
    327     { "vermont", 7, 7, 54 }, { "virgin islands", 6, 14, 53 },
    328     { "virginia", 8, 8, 52 },
    329     { "washington", 10, 10, 55 }, { "west virginia", 4, 13, 57 },
    330     { "wisconsin", 9, 9, 56 }, { "wyoming", 7, 7, 58 }
    331   };
    332 
    333   // Accumulative number of states for sorted names indexed by the first letter.
    334   // Required a different one since there are codes that don't share their
    335   // first letter with the name of their state (MP = Northern Mariana Islands).
    336   static const int state_names_accumulative[24] = {
    337      0,  5,  5,  8, 10, 10, 12, 14,
    338     15, 19, 19, 21, 22, 31, 40, 43,
    339     46, 46, 47, 49, 51, 52, 55, 59
    340   };
    341 
    342   DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1],
    343       static_cast<int>(ARRAYSIZE_UNSAFE(state_names)));
    344 
    345   const Word& first_word = words->at(state_first_word);
    346   int length = first_word.end - first_word.begin;
    347   if (length < 2 || !IsAsciiAlpha(*first_word.begin))
    348     return false;
    349 
    350   // No state names start with x, y, z.
    351   char16 first_letter = base::ToLowerASCII(*first_word.begin);
    352   if (first_letter > 'w')
    353     return false;
    354 
    355   DCHECK(first_letter >= 'a');
    356   int first_index = first_letter - 'a';
    357 
    358   // Look for two-letter state names.
    359   if (length == 2 && IsAsciiAlpha(*(first_word.begin + 1))) {
    360     char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1));
    361     DCHECK(second_letter >= 'a');
    362 
    363     int second_index = second_letter - 'a';
    364     if (!(state_two_letter_suffix[first_index] & (1 << second_index)))
    365       return false;
    366 
    367     std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] &
    368         ((1 << second_index) - 1);
    369     *state_last_word = state_first_word;
    370     *state_index = state_two_letter_accumulative[first_index] +
    371         previous_suffixes.count();
    372     return true;
    373   }
    374 
    375   // Look for full state names by their first letter. Discard by length.
    376   for (int state = state_names_accumulative[first_index];
    377       state < state_names_accumulative[first_index + 1]; ++state) {
    378     if (state_names[state].first_word_length != length)
    379       continue;
    380 
    381     bool state_match = false;
    382     size_t state_word = state_first_word;
    383     for (int pos = 0; true; ) {
    384       if (!WordLowerCaseEqualsASCII(words->at(state_word).begin,
    385           words->at(state_word).end, &state_names[state].string[pos]))
    386         break;
    387 
    388       pos += words->at(state_word).end - words->at(state_word).begin + 1;
    389       if (pos >= state_names[state].length) {
    390         state_match = true;
    391         break;
    392       }
    393 
    394       // Ran out of words, extract more from the tokenizer.
    395       if (++state_word == words->size()) {
    396         do {
    397           if (!tokenizer->GetNext())
    398             break;
    399         } while (tokenizer->token_is_delim());
    400         words->push_back(
    401             Word(tokenizer->token_begin(), tokenizer->token_end()));
    402       }
    403     }
    404 
    405     if (state_match) {
    406       *state_last_word = state_word;
    407       *state_index = state_names[state].state_index;
    408       return true;
    409     }
    410   }
    411 
    412   return false;
    413 }
    414 
    415 bool IsZipValid(const Word& word, size_t state_index) {
    416   size_t length = word.end - word.begin;
    417   if (length != kZipDigits && length != kZipPlus4Digits + 1)
    418     return false;
    419 
    420   for (base::string16::const_iterator it = word.begin; it != word.end; ++it) {
    421     size_t pos = it - word.begin;
    422     if (IsAsciiDigit(*it) || (*it == '-' && pos == kZipDigits))
    423       continue;
    424     return false;
    425   }
    426   return IsZipValidForState(word, state_index);
    427 }
    428 
    429 bool IsZipValidForState(const Word& word, size_t state_index) {
    430   // List of valid zip code ranges.
    431   static const struct {
    432     signed char low;
    433     signed char high;
    434     signed char exception1;
    435     signed char exception2;
    436   } zip_range[] = {
    437     { 99, 99, -1, -1 }, // AK Alaska.
    438     { 35, 36, -1, -1 }, // AL Alabama.
    439     { 71, 72, -1, -1 }, // AR Arkansas.
    440     { 96, 96, -1, -1 }, // AS American Samoa.
    441     { 85, 86, -1, -1 }, // AZ Arizona.
    442     { 90, 96, -1, -1 }, // CA California.
    443     { 80, 81, -1, -1 }, // CO Colorado.
    444     {  6,  6, -1, -1 }, // CT Connecticut.
    445     { 20, 20, -1, -1 }, // DC District of Columbia.
    446     { 19, 19, -1, -1 }, // DE Delaware.
    447     { 32, 34, -1, -1 }, // FL Florida.
    448     { 96, 96, -1, -1 }, // FM Federated States of Micronesia.
    449     { 30, 31, -1, -1 }, // GA Georgia.
    450     { 96, 96, -1, -1 }, // GU Guam.
    451     { 96, 96, -1, -1 }, // HI Hawaii.
    452     { 50, 52, -1, -1 }, // IA Iowa.
    453     { 83, 83, -1, -1 }, // ID Idaho.
    454     { 60, 62, -1, -1 }, // IL Illinois.
    455     { 46, 47, -1, -1 }, // IN Indiana.
    456     { 66, 67, 73, -1 }, // KS Kansas.
    457     { 40, 42, -1, -1 }, // KY Kentucky.
    458     { 70, 71, -1, -1 }, // LA Louisiana.
    459     {  1,  2, -1, -1 }, // MA Massachusetts.
    460     { 20, 21, -1, -1 }, // MD Maryland.
    461     {  3,  4, -1, -1 }, // ME Maine.
    462     { 96, 96, -1, -1 }, // MH Marshall Islands.
    463     { 48, 49, -1, -1 }, // MI Michigan.
    464     { 55, 56, -1, -1 }, // MN Minnesota.
    465     { 63, 65, -1, -1 }, // MO Missouri.
    466     { 96, 96, -1, -1 }, // MP Northern Mariana Islands.
    467     { 38, 39, -1, -1 }, // MS Mississippi.
    468     { 55, 56, -1, -1 }, // MT Montana.
    469     { 27, 28, -1, -1 }, // NC North Carolina.
    470     { 58, 58, -1, -1 }, // ND North Dakota.
    471     { 68, 69, -1, -1 }, // NE Nebraska.
    472     {  3,  4, -1, -1 }, // NH New Hampshire.
    473     {  7,  8, -1, -1 }, // NJ New Jersey.
    474     { 87, 88, 86, -1 }, // NM New Mexico.
    475     { 88, 89, 96, -1 }, // NV Nevada.
    476     { 10, 14,  0,  6 }, // NY New York.
    477     { 43, 45, -1, -1 }, // OH Ohio.
    478     { 73, 74, -1, -1 }, // OK Oklahoma.
    479     { 97, 97, -1, -1 }, // OR Oregon.
    480     { 15, 19, -1, -1 }, // PA Pennsylvania.
    481     {  6,  6,  0,  9 }, // PR Puerto Rico.
    482     { 96, 96, -1, -1 }, // PW Palau.
    483     {  2,  2, -1, -1 }, // RI Rhode Island.
    484     { 29, 29, -1, -1 }, // SC South Carolina.
    485     { 57, 57, -1, -1 }, // SD South Dakota.
    486     { 37, 38, -1, -1 }, // TN Tennessee.
    487     { 75, 79, 87, 88 }, // TX Texas.
    488     { 84, 84, -1, -1 }, // UT Utah.
    489     { 22, 24, 20, -1 }, // VA Virginia.
    490     {  6,  9, -1, -1 }, // VI Virgin Islands.
    491     {  5,  5, -1, -1 }, // VT Vermont.
    492     { 98, 99, -1, -1 }, // WA Washington.
    493     { 53, 54, -1, -1 }, // WI Wisconsin.
    494     { 24, 26, -1, -1 }, // WV West Virginia.
    495     { 82, 83, -1, -1 }  // WY Wyoming.
    496   };
    497 
    498   // Zip numeric value for the first two characters.
    499   DCHECK(word.begin != word.end);
    500   DCHECK(IsAsciiDigit(*word.begin));
    501   DCHECK(IsAsciiDigit(*(word.begin + 1)));
    502   int zip_prefix = (*word.begin - '0') * 10 + (*(word.begin + 1) - '0');
    503 
    504   if ((zip_prefix >= zip_range[state_index].low &&
    505        zip_prefix <= zip_range[state_index].high) ||
    506       zip_prefix == zip_range[state_index].exception1 ||
    507       zip_prefix == zip_range[state_index].exception2) {
    508     return true;
    509   }
    510   return false;
    511 }
    512 
    513 bool IsValidLocationName(const Word& word) {
    514   // Supported location names sorted alphabetically and grouped by first letter.
    515   static const struct LocationNameInfo {
    516     const char* string;
    517     char length;
    518     bool allow_plural;
    519   } location_names[157] = {
    520     { "alley", 5, false }, { "annex", 5, false }, { "arcade", 6, false },
    521     { "ave", 3, false }, { "ave.", 4, false }, { "avenue", 6, false },
    522     { "alameda", 7, false },
    523     { "bayou", 5, false }, { "beach", 5, false }, { "bend", 4, false },
    524     { "bluff", 5, true }, { "bottom", 6, false }, { "boulevard", 9, false },
    525     { "branch", 6, false }, { "bridge", 6, false }, { "brook", 5, true },
    526     { "burg", 4, true }, { "bypass", 6, false }, { "broadway", 8, false },
    527     { "camino", 6, false }, { "camp", 4, false }, { "canyon", 6, false },
    528     { "cape", 4, false }, { "causeway", 8, false }, { "center", 6, true },
    529     { "circle", 6, true }, { "cliff", 5, true }, { "club", 4, false },
    530     { "common", 6, false }, { "corner", 6, true }, { "course", 6, false },
    531     { "court", 5, true }, { "cove", 4, true }, { "creek", 5, false },
    532     { "crescent", 8, false }, { "crest", 5, false }, { "crossing", 8, false },
    533     { "crossroad", 9, false }, { "curve", 5, false }, { "circulo", 7, false },
    534     { "dale", 4, false }, { "dam", 3, false }, { "divide", 6, false },
    535     { "drive", 5, true },
    536     { "estate", 6, true }, { "expressway", 10, false },
    537     { "extension", 9, true },
    538     { "fall", 4, true }, { "ferry", 5, false }, { "field", 5, true },
    539     { "flat", 4, true }, { "ford", 4, true }, { "forest", 6, false },
    540     { "forge", 5, true }, { "fork", 4, true }, { "fort", 4, false },
    541     { "freeway", 7, false },
    542     { "garden", 6, true }, { "gateway", 7, false }, { "glen", 4, true },
    543     { "green", 5, true }, { "grove", 5, true },
    544     { "harbor", 6, true }, { "haven", 5, false }, { "heights", 7, false },
    545     { "highway", 7, false }, { "hill", 4, true }, { "hollow", 6, false },
    546     { "inlet", 5, false }, { "island", 6, true }, { "isle", 4, false },
    547     { "junction", 8, true },
    548     { "key", 3, true }, { "knoll", 5, true },
    549     { "lake", 4, true }, { "land", 4, false }, { "landing", 7, false },
    550     { "lane", 4, false }, { "light", 5, true }, { "loaf", 4, false },
    551     { "lock", 4, true }, { "lodge", 5, false }, { "loop", 4, false },
    552     { "mall", 4, false }, { "manor", 5, true }, { "meadow", 6, true },
    553     { "mews", 4, false }, { "mill", 4, true }, { "mission", 7, false },
    554     { "motorway", 8, false }, { "mount", 5, false }, { "mountain", 8, true },
    555     { "neck", 4, false },
    556     { "orchard", 7, false }, { "oval", 4, false }, { "overpass", 8, false },
    557     { "park", 4, true }, { "parkway", 7, true }, { "pass", 4, false },
    558     { "passage", 7, false }, { "path", 4, false }, { "pike", 4, false },
    559     { "pine", 4, true }, { "plain", 5, true }, { "plaza", 5, false },
    560     { "point", 5, true }, { "port", 4, true }, { "prairie", 7, false },
    561     { "privada", 7, false },
    562     { "radial", 6, false }, { "ramp", 4, false }, { "ranch", 5, false },
    563     { "rapid", 5, true }, { "rest", 4, false }, { "ridge", 5, true },
    564     { "river", 5, false }, { "road", 4, true }, { "route", 5, false },
    565     { "row", 3, false }, { "rue", 3, false }, { "run", 3, false },
    566     { "shoal", 5, true }, { "shore", 5, true }, { "skyway", 6, false },
    567     { "spring", 6, true }, { "spur", 4, true }, { "square", 6, true },
    568     { "station", 7, false }, { "stravenue", 9, false }, { "stream", 6, false },
    569     { "st", 2, false }, { "st.", 3, false }, { "street", 6, true },
    570     { "summit", 6, false }, { "speedway", 8, false },
    571     { "terrace", 7, false }, { "throughway", 10, false }, { "trace", 5, false },
    572     { "track", 5, false }, { "trafficway", 10, false }, { "trail", 5, false },
    573     { "tunnel", 6, false }, { "turnpike", 8, false },
    574     { "underpass", 9, false }, { "union", 5, true },
    575     { "valley", 6, true }, { "viaduct", 7, false }, { "view", 4, true },
    576     { "village", 7, true }, { "ville", 5, false }, { "vista", 5, false },
    577     { "walk", 4, true }, { "wall", 4, false }, { "way", 3, true },
    578     { "well", 4, true },
    579     { "xing", 4, false }, { "xrd", 3, false }
    580   };
    581 
    582   // Accumulative number of location names for each starting letter.
    583   static const int location_names_accumulative[25] = {
    584       0,   7,  19,  40,  44,
    585      47,  57,  62,  68,  71,
    586      72,  74,  83,  92,  93,
    587      96, 109, 109, 121, 135,
    588     143, 145, 151, 155, 157
    589   };
    590 
    591   DCHECK_EQ(
    592       location_names_accumulative[arraysize(location_names_accumulative) - 1],
    593       static_cast<int>(ARRAYSIZE_UNSAFE(location_names)));
    594 
    595   if (!IsAsciiAlpha(*word.begin))
    596     return false;
    597 
    598   // No location names start with y, z.
    599   char16 first_letter = base::ToLowerASCII(*word.begin);
    600   if (first_letter > 'x')
    601     return false;
    602 
    603   DCHECK(first_letter >= 'a');
    604   int index = first_letter - 'a';
    605   int length = std::distance(word.begin, word.end);
    606   for (int i = location_names_accumulative[index];
    607       i < location_names_accumulative[index + 1]; ++i) {
    608     if (location_names[i].length != length &&
    609         (location_names[i].allow_plural &&
    610          location_names[i].length + 1 != length)) {
    611       continue;
    612     }
    613 
    614     if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end,
    615                                        location_names[i].string,
    616                                        location_names[i].allow_plural)) {
    617       return true;
    618     }
    619   }
    620 
    621   return false;
    622 }
    623 
    624 } // namespace internal
    625 
    626 } // namespace address_parser
    627 
    628 }  // namespace content
    629