Home | History | Annotate | Download | only in browser
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // This file contains UTF8 strings that we want as char arrays.  To avoid
      6 // different compilers, we use a script to convert the UTF8 strings into
      7 // numeric literals (\x##).
      8 
      9 #include "components/autofill/core/browser/autofill_regex_constants.h"
     10 
     11 namespace autofill {
     12 
     13 /////////////////////////////////////////////////////////////////////////////
     14 // address_field.cc
     15 /////////////////////////////////////////////////////////////////////////////
     16 const char kAttentionIgnoredRe[] = "attention|attn";
     17 const char kRegionIgnoredRe[] =
     18     "province|region|other"
     19     "|provincia"  // es
     20     "|bairro|suburb";  // pt-BR, pt-PT
     21 const char kCompanyRe[] =
     22     "company|business|organization|organisation"
     23     "|firma|firmenname"  // de-DE
     24     "|empresa"  // es
     25     "|societe|socit"  // fr-FR
     26     "|ragione.?sociale"  // it-IT
     27     "|"  // ja-JP
     28     "|.?"  // ru
     29     "||"  // zh-CN
     30     "||";  // ko-KR
     31 const char kAddressLine1Re[] =
     32     "address.*line|address1|addr1|street"
     33     "|strasse|strae|hausnummer|housenumber"  // de-DE
     34     "|house.?name"  // en-GB
     35     "|direccion|direccin"  // es
     36     "|adresse"  // fr-FR
     37     "|indirizzo"  // it-IT
     38     "|1"  // ja-JP
     39     "|morada|endereo"  // pt-BR, pt-PT
     40     "|"  // ru
     41     "|"  // zh-CN
     42     "|.?1";  // ko-KR
     43 const char kAddressLine1LabelRe[] =
     44     "address"
     45     "|adresse"  // fr-FR
     46     "|indirizzo"  // it-IT
     47     "|"  // ja-JP
     48     "|"  // zh-CN
     49     "|";  // ko-KR
     50 const char kAddressLine2Re[] =
     51     "address.*line2|address2|addr2|street|suite|unit"
     52     "|adresszusatz|ergnzende.?angaben"  // de-DE
     53     "|direccion2|colonia|adicional"  // es
     54     "|addresssuppl|complementnom|appartement"  // fr-FR
     55     "|indirizzo2"  // it-IT
     56     "|2"  // ja-JP
     57     "|complemento|addrcomplement"  // pt-BR, pt-PT
     58     "|"  // ru
     59     "|2"  // zh-CN
     60     "|.?2";  // ko-KR
     61 const char kAddressLine2LabelRe[] =
     62     "address"
     63     "|adresse"  // fr-FR
     64     "|indirizzo"  // it-IT
     65     "|"  // zh-CN
     66     "|";  // ko-KR
     67 const char kAddressLinesExtraRe[] =
     68     "address.*line[3-9]|address[3-9]|addr[3-9]|street|line[3-9]"
     69     "|municipio"  // es
     70     "|batiment|residence"  // fr-FR
     71     "|indirizzo[3-9]";  // it-IT
     72 const char kCountryRe[] =
     73     "country|countries|location"
     74     "|pas|pais"  // es
     75     "|"  // ja-JP
     76     "|"  // zh-CN
     77     "||";  // ko-KR
     78 const char kZipCodeRe[] =
     79     "zip|postal|post.*code|pcode|^1z$"
     80     "|postleitzahl"  // de-DE
     81     "|\\bcp\\b"  // es
     82     "|\\bcdp\\b"  // fr-FR
     83     "|\\bcap\\b"  // it-IT
     84     "|"  // ja-JP
     85     "|codigo|codpos|\\bcep\\b"  // pt-BR, pt-PT
     86     "|.?"  // ru
     87     "||"  // zh-CN
     88     "|"  // zh-TW
     89     "|.?";  // ko-KR
     90 const char kZip4Re[] =
     91     "zip|^-$|post2"
     92     "|codpos2";  // pt-BR, pt-PT
     93 const char kCityRe[] =
     94     "city|town"
     95     "|\\bort\\b|stadt"  // de-DE
     96     "|suburb"  // en-AU
     97     "|ciudad|provincia|localidad|poblacion"  // es
     98     "|ville|commune"  // fr-FR
     99     "|localita"  // it-IT
    100     "|"  // ja-JP
    101     "|cidade"  // pt-BR, pt-PT
    102     "|"  // ru
    103     "|"  // zh-CN
    104     "|"  // zh-TW
    105     "|^[^]|[]?[]?";  // ko-KR
    106 const char kStateRe[] =
    107     "(?<!united )state|county|region|province"
    108     "|land"  // de-DE
    109     "|county|principality"  // en-UK
    110     "|"  // ja-JP
    111     "|estado|provincia"  // pt-BR, pt-PT
    112     "|"  // ru
    113     "|"  // zh-CN
    114     "|"  // zh-TW
    115     "|^[]?";  // ko-KR
    116 const char kAddressTypeSameAsRe[] = "same as";
    117 const char kAddressTypeUseMyRe[] = "use my";
    118 const char kBillingDesignatorRe[] = "bill";
    119 const char kShippingDesignatorRe[] = "ship";
    120 
    121 /////////////////////////////////////////////////////////////////////////////
    122 // credit_card_field.cc
    123 /////////////////////////////////////////////////////////////////////////////
    124 const char kNameOnCardRe[] =
    125     "card.?holder|name.*\\bon\\b.*card|cc.?name|cc.?full.?name|owner"
    126     "|karteninhaber"  // de-DE
    127     "|nombre.*tarjeta"  // es
    128     "|nom.*carte"  // fr-FR
    129     "|nome.*cart"  // it-IT
    130     "|"  // ja-JP
    131     "|.*"  // ru
    132     "|||"  // zh-CN
    133     "|";  // zh-TW
    134 const char kNameOnCardContextualRe[] =
    135     "name";
    136 const char kCardNumberRe[] =
    137     "card.?number|card.?#|card.?no|cc.?num|acct.?num"
    138     "|nummer"  // de-DE
    139     "|credito|numero|nmero"  // es
    140     "|numro"  // fr-FR
    141     "|"  // ja-JP
    142     "|.*"  // ru
    143     "||"  // zh-CN
    144     "|"  // zh-TW
    145     "|";  // ko-KR
    146 const char kCardCvcRe[] =
    147     "verification|card identification|security code|cvn|cvv|cvc|csc|\\bcid\\b";
    148 const char kCardTypeRe[] =
    149     "card.?type|cc.?type|payment.?method";
    150 
    151 // "Expiration date" is the most common label here, but some pages have
    152 // "Expires", "exp. date" or "exp. month" and "exp. year".  We also look
    153 // for the field names ccmonth and ccyear, which appear on at least 4 of
    154 // our test pages.
    155 
    156 // On at least one page (The China Shop2.html) we find only the labels
    157 // "month" and "year".  So for now we match these words directly; we'll
    158 // see if this turns out to be too general.
    159 
    160 // Toolbar Bug 51451: indeed, simply matching "month" is too general for
    161 //   https://rps.fidelity.com/ftgw/rps/RtlCust/CreatePIN/Init.
    162 // Instead, we match only words beginning with "month".
    163 const char kExpirationMonthRe[] =
    164     "expir|exp.*mo|exp.*date|ccmonth|cardmonth"
    165     "|gueltig|gltig|monat"  // de-DE
    166     "|fecha"  // es
    167     "|date.*exp"  // fr-FR
    168     "|scadenza"  // it-IT
    169     "|"  // ja-JP
    170     "|validade"  // pt-BR, pt-PT
    171     "|  "  // ru
    172     "|";  // zh-CN
    173 const char kExpirationYearRe[] =
    174     "exp|^/|year"
    175     "|ablaufdatum|gueltig|gltig|yahr"  // de-DE
    176     "|fecha"  // es
    177     "|scadenza"  // it-IT
    178     "|"  // ja-JP
    179     "|validade"  // pt-BR, pt-PT
    180     "|  "  // ru
    181     "||";  // zh-CN
    182 
    183 // This regex is a little bit nasty, but it is simply requiring exactly two
    184 // adjacent y's.
    185 const char kExpirationDate2DigitYearRe[] =
    186     "exp.*date.*[^y]yy([^y]|$)";
    187 const char kExpirationDateRe[] =
    188     "expir|exp.*date"
    189     "|gueltig|gltig"  // de-DE
    190     "|fecha"  // es
    191     "|date.*exp"  // fr-FR
    192     "|scadenza"  // it-IT
    193     "|"  // ja-JP
    194     "|validade"  // pt-BR, pt-PT
    195     "|  ";  // ru
    196 const char kCardIgnoredRe[] =
    197     "^card";
    198 const char kGiftCardRe[] =
    199     "gift.?card";
    200 
    201 
    202 /////////////////////////////////////////////////////////////////////////////
    203 // email_field.cc
    204 /////////////////////////////////////////////////////////////////////////////
    205 const char kEmailRe[] =
    206     "e.?mail"
    207     "|courriel"  // fr
    208     "|"  // ja-JP
    209     "|.?"  // ru
    210     "||"  // zh-CN
    211     "|"  // zh-TW
    212     "|(|.?|[Ee]-?mail)(.?)?";  // ko-KR
    213 
    214 
    215 /////////////////////////////////////////////////////////////////////////////
    216 // name_field.cc
    217 /////////////////////////////////////////////////////////////////////////////
    218 const char kNameIgnoredRe[] =
    219     "user.?name|user.?id|nickname|maiden name|title|prefix|suffix"
    220     "|vollstndiger.?name"  // de-DE
    221     "|"  // zh-CN
    222     "|(.?)?|.?ID";  // ko-KR
    223 const char kNameRe[] =
    224     "^name|full.?name|your.?name|customer.?name|firstandlastname|bill.?name"
    225         "|ship.?name"
    226     "|nombre.*y.*apellidos"  // es
    227     "|^nom"  // fr-FR
    228     "||"  // ja-JP
    229     "|^nome"  // pt-BR, pt-PT
    230     "|"  // zh-CN
    231     "|";  // ko-KR
    232 const char kNameSpecificRe[] =
    233     "^name"
    234     "|^nom"  // fr-FR
    235     "|^nome";  // pt-BR, pt-PT
    236 const char kFirstNameRe[] =
    237     "first.*name|initials|fname|first$"
    238     "|vorname"  // de-DE
    239     "|nombre"  // es
    240     "|forename|prnom|prenom"  // fr-FR
    241     "|"  // ja-JP
    242     "|nome"  // pt-BR, pt-PT
    243     "|"  // ru
    244     "|";  // ko-KR
    245 const char kMiddleInitialRe[] = "middle.*initial|m\\.i\\.|mi$|\\bmi\\b";
    246 const char kMiddleNameRe[] =
    247     "middle.*name|mname|middle$"
    248     "|apellido.?materno|lastlastname";  // es
    249 const char kLastNameRe[] =
    250     "last.*name|lname|surname|last$|secondname"
    251     "|nachname"  // de-DE
    252     "|apellido"  // es
    253     "|famille|^nom"  // fr-FR
    254     "|cognome"  // it-IT
    255     "|"  // ja-JP
    256     "|morada|apelidos|surename|sobrenome"  // pt-BR, pt-PT
    257     "|"  // ru
    258     "|[^]?";  // ko-KR
    259 
    260 /////////////////////////////////////////////////////////////////////////////
    261 // phone_field.cc
    262 /////////////////////////////////////////////////////////////////////////////
    263 const char kPhoneRe[] =
    264     "phone|mobile"
    265     "|telefonnummer"  // de-DE
    266     "|telefono|telfono"  // es
    267     "|telfixe"  // fr-FR
    268     "|"  // ja-JP
    269     "|telefone|telemovel"  // pt-BR, pt-PT
    270     "|"  // ru
    271     "|"  // zh-CN
    272     "|(|||)(.?)?";  // ko-KR
    273 const char kCountryCodeRe[] =
    274     "country.*code|ccode|_cc";
    275 const char kAreaCodeNotextRe[] =
    276     "^\\($";
    277 const char kAreaCodeRe[] =
    278     "area.*code|acode|area"
    279     "|.?";  // ko-KR
    280 const char kPhonePrefixSeparatorRe[] =
    281     "^-$|^\\)$";
    282 const char kPhoneSuffixSeparatorRe[] =
    283     "^-$";
    284 const char kPhonePrefixRe[] =
    285     "prefix|exchange"
    286     "|preselection"  // fr-FR
    287     "|ddd";  // pt-BR, pt-PT
    288 const char kPhoneSuffixRe[] =
    289     "suffix";
    290 const char kPhoneExtensionRe[] =
    291     "\\bext|ext\\b|extension"
    292     "|ramal";  // pt-BR, pt-PT
    293 
    294 }  // namespace autofill
    295