Home | History | Annotate | Download | only in renderer
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/autofill/content/renderer/form_autofill_util.h"
      6 
      7 #include <map>
      8 
      9 #include "base/command_line.h"
     10 #include "base/logging.h"
     11 #include "base/memory/scoped_vector.h"
     12 #include "base/metrics/field_trial.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/strings/utf_string_conversions.h"
     15 #include "components/autofill/core/common/autofill_data_validation.h"
     16 #include "components/autofill/core/common/autofill_switches.h"
     17 #include "components/autofill/core/common/form_data.h"
     18 #include "components/autofill/core/common/form_field_data.h"
     19 #include "components/autofill/core/common/web_element_descriptor.h"
     20 #include "third_party/WebKit/public/platform/WebString.h"
     21 #include "third_party/WebKit/public/platform/WebVector.h"
     22 #include "third_party/WebKit/public/web/WebDocument.h"
     23 #include "third_party/WebKit/public/web/WebElement.h"
     24 #include "third_party/WebKit/public/web/WebElementCollection.h"
     25 #include "third_party/WebKit/public/web/WebExceptionCode.h"
     26 #include "third_party/WebKit/public/web/WebFormControlElement.h"
     27 #include "third_party/WebKit/public/web/WebFormElement.h"
     28 #include "third_party/WebKit/public/web/WebInputElement.h"
     29 #include "third_party/WebKit/public/web/WebLabelElement.h"
     30 #include "third_party/WebKit/public/web/WebLocalFrame.h"
     31 #include "third_party/WebKit/public/web/WebNode.h"
     32 #include "third_party/WebKit/public/web/WebNodeList.h"
     33 #include "third_party/WebKit/public/web/WebOptionElement.h"
     34 #include "third_party/WebKit/public/web/WebSelectElement.h"
     35 #include "third_party/WebKit/public/web/WebTextAreaElement.h"
     36 
     37 using blink::WebDocument;
     38 using blink::WebElement;
     39 using blink::WebElementCollection;
     40 using blink::WebExceptionCode;
     41 using blink::WebFormControlElement;
     42 using blink::WebFormElement;
     43 using blink::WebFrame;
     44 using blink::WebInputElement;
     45 using blink::WebLabelElement;
     46 using blink::WebNode;
     47 using blink::WebNodeList;
     48 using blink::WebOptionElement;
     49 using blink::WebSelectElement;
     50 using blink::WebTextAreaElement;
     51 using blink::WebString;
     52 using blink::WebVector;
     53 
     54 namespace autofill {
     55 namespace {
     56 
     57 bool IsOptionElement(const WebElement& element) {
     58   CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
     59   return element.hasHTMLTagName(kOption);
     60 }
     61 
     62 bool IsScriptElement(const WebElement& element) {
     63   CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
     64   return element.hasHTMLTagName(kScript);
     65 }
     66 
     67 bool IsNoScriptElement(const WebElement& element) {
     68   CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
     69   return element.hasHTMLTagName(kNoScript);
     70 }
     71 
     72 bool HasTagName(const WebNode& node, const blink::WebString& tag) {
     73   return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
     74 }
     75 
     76 bool IsAutofillableElement(const WebFormControlElement& element) {
     77   const WebInputElement* input_element = toWebInputElement(&element);
     78   return IsAutofillableInputElement(input_element) ||
     79          IsSelectElement(element) ||
     80          IsTextAreaElement(element);
     81 }
     82 
     83 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
     84 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
     85   return input_element.autoComplete();
     86 }
     87 
     88 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
     89 // to a single space.  If |force_whitespace| is true, then the resulting string
     90 // is guaranteed to have a space between |prefix| and |suffix|.  Otherwise, the
     91 // result includes a space only if |prefix| has trailing whitespace or |suffix|
     92 // has leading whitespace.
     93 // A few examples:
     94 //  * CombineAndCollapseWhitespace("foo", "bar", false)       -> "foobar"
     95 //  * CombineAndCollapseWhitespace("foo", "bar", true)        -> "foo bar"
     96 //  * CombineAndCollapseWhitespace("foo ", "bar", false)      -> "foo bar"
     97 //  * CombineAndCollapseWhitespace("foo", " bar", false)      -> "foo bar"
     98 //  * CombineAndCollapseWhitespace("foo", " bar", true)       -> "foo bar"
     99 //  * CombineAndCollapseWhitespace("foo   ", "   bar", false) -> "foo bar"
    100 //  * CombineAndCollapseWhitespace(" foo", "bar ", false)     -> " foobar "
    101 //  * CombineAndCollapseWhitespace(" foo", "bar ", true)      -> " foo bar "
    102 const base::string16 CombineAndCollapseWhitespace(
    103     const base::string16& prefix,
    104     const base::string16& suffix,
    105     bool force_whitespace) {
    106   base::string16 prefix_trimmed;
    107   base::TrimPositions prefix_trailing_whitespace =
    108       base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);
    109 
    110   // Recursively compute the children's text.
    111   base::string16 suffix_trimmed;
    112   base::TrimPositions suffix_leading_whitespace =
    113       base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);
    114 
    115   if (prefix_trailing_whitespace || suffix_leading_whitespace ||
    116       force_whitespace) {
    117     return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
    118   } else {
    119     return prefix_trimmed + suffix_trimmed;
    120   }
    121 }
    122 
    123 // This is a helper function for the FindChildText() function (see below).
    124 // Search depth is limited with the |depth| parameter.
    125 base::string16 FindChildTextInner(const WebNode& node, int depth) {
    126   if (depth <= 0 || node.isNull())
    127     return base::string16();
    128 
    129   // Skip over comments.
    130   if (node.nodeType() == WebNode::CommentNode)
    131     return FindChildTextInner(node.nextSibling(), depth - 1);
    132 
    133   if (node.nodeType() != WebNode::ElementNode &&
    134       node.nodeType() != WebNode::TextNode)
    135     return base::string16();
    136 
    137   // Ignore elements known not to contain inferable labels.
    138   if (node.isElementNode()) {
    139     const WebElement element = node.toConst<WebElement>();
    140     if (IsOptionElement(element) ||
    141         IsScriptElement(element) ||
    142         IsNoScriptElement(element) ||
    143         (element.isFormControlElement() &&
    144          IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
    145       return base::string16();
    146     }
    147   }
    148 
    149   // Extract the text exactly at this node.
    150   base::string16 node_text = node.nodeValue();
    151 
    152   // Recursively compute the children's text.
    153   // Preserve inter-element whitespace separation.
    154   base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
    155   bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
    156   node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
    157 
    158   // Recursively compute the siblings' text.
    159   // Again, preserve inter-element whitespace separation.
    160   base::string16 sibling_text =
    161       FindChildTextInner(node.nextSibling(), depth - 1);
    162   add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
    163   node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
    164 
    165   return node_text;
    166 }
    167 
    168 // Returns the aggregated values of the descendants of |element| that are
    169 // non-empty text nodes.  This is a faster alternative to |innerText()| for
    170 // performance critical operations.  It does a full depth-first search so can be
    171 // used when the structure is not directly known.  However, unlike with
    172 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
    173 // Whitespace is trimmed from text accumulated at descendant nodes.
    174 base::string16 FindChildText(const WebNode& node) {
    175   if (node.isTextNode())
    176     return node.nodeValue();
    177 
    178   WebNode child = node.firstChild();
    179 
    180   const int kChildSearchDepth = 10;
    181   base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
    182   base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
    183   return node_text;
    184 }
    185 
    186 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    187 // a previous sibling of |element|,
    188 // e.g. Some Text <input ...>
    189 // or   Some <span>Text</span> <input ...>
    190 // or   <p>Some Text</p><input ...>
    191 // or   <label>Some Text</label> <input ...>
    192 // or   Some Text <img><input ...>
    193 // or   <b>Some Text</b><br/> <input ...>.
    194 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
    195   base::string16 inferred_label;
    196   WebNode previous = element;
    197   while (true) {
    198     previous = previous.previousSibling();
    199     if (previous.isNull())
    200       break;
    201 
    202     // Skip over comments.
    203     WebNode::NodeType node_type = previous.nodeType();
    204     if (node_type == WebNode::CommentNode)
    205       continue;
    206 
    207     // Otherwise, only consider normal HTML elements and their contents.
    208     if (node_type != WebNode::TextNode &&
    209         node_type != WebNode::ElementNode)
    210       break;
    211 
    212     // A label might be split across multiple "lightweight" nodes.
    213     // Coalesce any text contained in multiple consecutive
    214     //  (a) plain text nodes or
    215     //  (b) inline HTML elements that are essentially equivalent to text nodes.
    216     CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
    217     CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
    218     CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
    219     CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
    220     if (previous.isTextNode() ||
    221         HasTagName(previous, kBold) || HasTagName(previous, kStrong) ||
    222         HasTagName(previous, kSpan) || HasTagName(previous, kFont)) {
    223       base::string16 value = FindChildText(previous);
    224       // A text node's value will be empty if it is for a line break.
    225       bool add_space = previous.isTextNode() && value.empty();
    226       inferred_label =
    227           CombineAndCollapseWhitespace(value, inferred_label, add_space);
    228       continue;
    229     }
    230 
    231     // If we have identified a partial label and have reached a non-lightweight
    232     // element, consider the label to be complete.
    233     base::string16 trimmed_label;
    234     base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label);
    235     if (!trimmed_label.empty())
    236       break;
    237 
    238     // <img> and <br> tags often appear between the input element and its
    239     // label text, so skip over them.
    240     CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
    241     CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
    242     if (HasTagName(previous, kImage) || HasTagName(previous, kBreak))
    243       continue;
    244 
    245     // We only expect <p> and <label> tags to contain the full label text.
    246     CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
    247     CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
    248     if (HasTagName(previous, kPage) || HasTagName(previous, kLabel))
    249       inferred_label = FindChildText(previous);
    250 
    251     break;
    252   }
    253 
    254   base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label);
    255   return inferred_label;
    256 }
    257 
    258 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    259 // enclosing list item,
    260 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
    261 base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
    262   WebNode parent = element.parentNode();
    263   CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
    264   while (!parent.isNull() && parent.isElementNode() &&
    265          !parent.to<WebElement>().hasHTMLTagName(kListItem)) {
    266     parent = parent.parentNode();
    267   }
    268 
    269   if (!parent.isNull() && HasTagName(parent, kListItem))
    270     return FindChildText(parent);
    271 
    272   return base::string16();
    273 }
    274 
    275 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    276 // surrounding table structure,
    277 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
    278 // or   <tr><th>Some Text</th><td><input ...></td></tr>
    279 // or   <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
    280 // or   <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
    281 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
    282   CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
    283   WebNode parent = element.parentNode();
    284   while (!parent.isNull() && parent.isElementNode() &&
    285          !parent.to<WebElement>().hasHTMLTagName(kTableCell)) {
    286     parent = parent.parentNode();
    287   }
    288 
    289   if (parent.isNull())
    290     return base::string16();
    291 
    292   // Check all previous siblings, skipping non-element nodes, until we find a
    293   // non-empty text block.
    294   base::string16 inferred_label;
    295   WebNode previous = parent.previousSibling();
    296   CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
    297   while (inferred_label.empty() && !previous.isNull()) {
    298     if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
    299       inferred_label = FindChildText(previous);
    300 
    301     previous = previous.previousSibling();
    302   }
    303 
    304   return inferred_label;
    305 }
    306 
    307 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    308 // surrounding table structure,
    309 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
    310 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
    311   CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
    312   WebNode parent = element.parentNode();
    313   while (!parent.isNull() && parent.isElementNode() &&
    314          !parent.to<WebElement>().hasHTMLTagName(kTableRow)) {
    315     parent = parent.parentNode();
    316   }
    317 
    318   if (parent.isNull())
    319     return base::string16();
    320 
    321   // Check all previous siblings, skipping non-element nodes, until we find a
    322   // non-empty text block.
    323   base::string16 inferred_label;
    324   WebNode previous = parent.previousSibling();
    325   while (inferred_label.empty() && !previous.isNull()) {
    326     if (HasTagName(previous, kTableRow))
    327       inferred_label = FindChildText(previous);
    328 
    329     previous = previous.previousSibling();
    330   }
    331 
    332   return inferred_label;
    333 }
    334 
    335 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    336 // a surrounding div table,
    337 // e.g. <div>Some Text<span><input ...></span></div>
    338 // e.g. <div>Some Text</div><div><input ...></div>
    339 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
    340   WebNode node = element.parentNode();
    341   bool looking_for_parent = true;
    342 
    343   // Search the sibling and parent <div>s until we find a candidate label.
    344   base::string16 inferred_label;
    345   CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
    346   CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
    347   CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
    348   while (inferred_label.empty() && !node.isNull()) {
    349     if (HasTagName(node, kDiv)) {
    350       looking_for_parent = false;
    351       inferred_label = FindChildText(node);
    352     } else if (looking_for_parent &&
    353                (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) {
    354       // If the element is in a table or fieldset, its label most likely is too.
    355       break;
    356     }
    357 
    358     if (node.previousSibling().isNull()) {
    359       // If there are no more siblings, continue walking up the tree.
    360       looking_for_parent = true;
    361     }
    362 
    363     if (looking_for_parent)
    364       node = node.parentNode();
    365     else
    366       node = node.previousSibling();
    367   }
    368 
    369   return inferred_label;
    370 }
    371 
    372 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    373 // a surrounding definition list,
    374 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
    375 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
    376 base::string16 InferLabelFromDefinitionList(
    377     const WebFormControlElement& element) {
    378   CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
    379   WebNode parent = element.parentNode();
    380   while (!parent.isNull() && parent.isElementNode() &&
    381          !parent.to<WebElement>().hasHTMLTagName(kDefinitionData))
    382     parent = parent.parentNode();
    383 
    384   if (parent.isNull() || !HasTagName(parent, kDefinitionData))
    385     return base::string16();
    386 
    387   // Skip by any intervening text nodes.
    388   WebNode previous = parent.previousSibling();
    389   while (!previous.isNull() && previous.isTextNode())
    390     previous = previous.previousSibling();
    391 
    392   CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
    393   if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
    394     return base::string16();
    395 
    396   return FindChildText(previous);
    397 }
    398 
    399 // Infers corresponding label for |element| from surrounding context in the DOM,
    400 // e.g. the contents of the preceding <p> tag or text element.
    401 base::string16 InferLabelForElement(const WebFormControlElement& element) {
    402   base::string16 inferred_label = InferLabelFromPrevious(element);
    403   if (!inferred_label.empty())
    404     return inferred_label;
    405 
    406   // If we didn't find a label, check for list item case.
    407   inferred_label = InferLabelFromListItem(element);
    408   if (!inferred_label.empty())
    409     return inferred_label;
    410 
    411   // If we didn't find a label, check for table cell case.
    412   inferred_label = InferLabelFromTableColumn(element);
    413   if (!inferred_label.empty())
    414     return inferred_label;
    415 
    416   // If we didn't find a label, check for table row case.
    417   inferred_label = InferLabelFromTableRow(element);
    418   if (!inferred_label.empty())
    419     return inferred_label;
    420 
    421   // If we didn't find a label, check for definition list case.
    422   inferred_label = InferLabelFromDefinitionList(element);
    423   if (!inferred_label.empty())
    424     return inferred_label;
    425 
    426   // If we didn't find a label, check for div table case.
    427   return InferLabelFromDivTable(element);
    428 }
    429 
    430 // Fills |option_strings| with the values of the <option> elements present in
    431 // |select_element|.
    432 void GetOptionStringsFromElement(const WebSelectElement& select_element,
    433                                  std::vector<base::string16>* option_values,
    434                                  std::vector<base::string16>* option_contents) {
    435   DCHECK(!select_element.isNull());
    436 
    437   option_values->clear();
    438   option_contents->clear();
    439   WebVector<WebElement> list_items = select_element.listItems();
    440 
    441   // Constrain the maximum list length to prevent a malicious site from DOS'ing
    442   // the browser, without entirely breaking autocomplete for some extreme
    443   // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
    444   if (list_items.size() > kMaxListSize)
    445     return;
    446 
    447   option_values->reserve(list_items.size());
    448   option_contents->reserve(list_items.size());
    449   for (size_t i = 0; i < list_items.size(); ++i) {
    450     if (IsOptionElement(list_items[i])) {
    451       const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
    452       option_values->push_back(option.value());
    453       option_contents->push_back(option.text());
    454     }
    455   }
    456 }
    457 
    458 // The callback type used by |ForEachMatchingFormField()|.
    459 typedef void (*Callback)(const FormFieldData&,
    460                          bool, /* is_initiating_element */
    461                          blink::WebFormControlElement*);
    462 
    463 // For each autofillable field in |data| that matches a field in the |form|,
    464 // the |callback| is invoked with the corresponding |form| field data.
    465 void ForEachMatchingFormField(const WebFormElement& form_element,
    466                               const WebElement& initiating_element,
    467                               const FormData& data,
    468                               bool only_focusable_elements,
    469                               bool force_override,
    470                               Callback callback) {
    471   std::vector<WebFormControlElement> control_elements;
    472   ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
    473                               &control_elements);
    474 
    475   if (control_elements.size() != data.fields.size()) {
    476     // This case should be reachable only for pathological websites and tests,
    477     // which add or remove form fields while the user is interacting with the
    478     // Autofill popup.
    479     return;
    480   }
    481 
    482   // It's possible that the site has injected fields into the form after the
    483   // page has loaded, so we can't assert that the size of the cached control
    484   // elements is equal to the size of the fields in |form|.  Fortunately, the
    485   // one case in the wild where this happens, paypal.com signup form, the fields
    486   // are appended to the end of the form and are not visible.
    487   for (size_t i = 0; i < control_elements.size(); ++i) {
    488     WebFormControlElement* element = &control_elements[i];
    489 
    490     if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
    491       // This case should be reachable only for pathological websites, which
    492       // rename form fields while the user is interacting with the Autofill
    493       // popup.  I (isherman) am not aware of any such websites, and so am
    494       // optimistically including a NOTREACHED().  If you ever trip this check,
    495       // please file a bug against me.
    496       NOTREACHED();
    497       continue;
    498     }
    499 
    500     bool is_initiating_element = (*element == initiating_element);
    501 
    502     // Only autofill empty fields and the field that initiated the filling,
    503     // i.e. the field the user is currently editing and interacting with.
    504     const WebInputElement* input_element = toWebInputElement(element);
    505     if (!force_override && !is_initiating_element &&
    506         ((IsAutofillableInputElement(input_element) ||
    507           IsTextAreaElement(*element)) &&
    508          !element->value().isEmpty()))
    509       continue;
    510 
    511     if (!element->isEnabled() || element->isReadOnly() ||
    512         (only_focusable_elements && !element->isFocusable()))
    513       continue;
    514 
    515     callback(data.fields[i], is_initiating_element, element);
    516   }
    517 }
    518 
    519 // Sets the |field|'s value to the value in |data|.
    520 // Also sets the "autofilled" attribute, causing the background to be yellow.
    521 void FillFormField(const FormFieldData& data,
    522                    bool is_initiating_node,
    523                    blink::WebFormControlElement* field) {
    524   // Nothing to fill.
    525   if (data.value.empty())
    526     return;
    527 
    528   if (!data.is_autofilled)
    529     return;
    530 
    531   WebInputElement* input_element = toWebInputElement(field);
    532   if (IsCheckableElement(input_element)) {
    533     input_element->setChecked(data.is_checked, true);
    534   } else {
    535     base::string16 value = data.value;
    536     if (IsTextInput(input_element) || IsMonthInput(input_element)) {
    537       // If the maxlength attribute contains a negative value, maxLength()
    538       // returns the default maxlength value.
    539       value = value.substr(0, input_element->maxLength());
    540     }
    541     field->setValue(value, true);
    542   }
    543 
    544   field->setAutofilled(true);
    545 
    546   if (is_initiating_node &&
    547       ((IsTextInput(input_element) || IsMonthInput(input_element)) ||
    548        IsTextAreaElement(*field))) {
    549     int length = field->value().length();
    550     field->setSelectionRange(length, length);
    551     // Clear the current IME composition (the underline), if there is one.
    552     field->document().frame()->unmarkText();
    553   }
    554 }
    555 
    556 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
    557 // Also sets the "autofilled" attribute, causing the background to be yellow.
    558 void PreviewFormField(const FormFieldData& data,
    559                       bool is_initiating_node,
    560                       blink::WebFormControlElement* field) {
    561   // Nothing to preview.
    562   if (data.value.empty())
    563     return;
    564 
    565   if (!data.is_autofilled)
    566     return;
    567 
    568   // Preview input, textarea and select fields. For input fields, excludes
    569   // checkboxes and radio buttons, as there is no provision for
    570   // setSuggestedCheckedValue in WebInputElement.
    571   WebInputElement* input_element = toWebInputElement(field);
    572   if (IsTextInput(input_element) || IsMonthInput(input_element)) {
    573     // If the maxlength attribute contains a negative value, maxLength()
    574     // returns the default maxlength value.
    575     input_element->setSuggestedValue(
    576       data.value.substr(0, input_element->maxLength()));
    577     input_element->setAutofilled(true);
    578   } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) {
    579     field->setSuggestedValue(data.value);
    580     field->setAutofilled(true);
    581   }
    582 
    583   if (is_initiating_node &&
    584       (IsTextInput(input_element) || IsTextAreaElement(*field))) {
    585     // Select the part of the text that the user didn't type.
    586     int start = field->value().length();
    587     int end = field->suggestedValue().length();
    588     field->setSelectionRange(start, end);
    589   }
    590 }
    591 
    592 std::string RetrievalMethodToString(
    593     const WebElementDescriptor::RetrievalMethod& method) {
    594   switch (method) {
    595     case WebElementDescriptor::CSS_SELECTOR:
    596       return "CSS_SELECTOR";
    597     case WebElementDescriptor::ID:
    598       return "ID";
    599     case WebElementDescriptor::NONE:
    600       return "NONE";
    601   }
    602   NOTREACHED();
    603   return "UNKNOWN";
    604 }
    605 
    606 // Recursively checks whether |node| or any of its children have a non-empty
    607 // bounding box. The recursion depth is bounded by |depth|.
    608 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
    609   if (depth < 0)
    610     return false;
    611   if (node.hasNonEmptyBoundingBox())
    612     return true;
    613 
    614   // The childNodes method is not a const method. Therefore it cannot be called
    615   // on a const reference. Therefore we need a const cast.
    616   const blink::WebNodeList& children =
    617       const_cast<blink::WebNode&>(node).childNodes();
    618   size_t length = children.length();
    619   for (size_t i = 0; i < length; ++i) {
    620     const blink::WebNode& item = children.item(i);
    621     if (IsWebNodeVisibleImpl(item, depth - 1))
    622       return true;
    623   }
    624   return false;
    625 }
    626 
    627 }  // namespace
    628 
    629 const size_t kMaxParseableFields = 200;
    630 
    631 bool IsMonthInput(const WebInputElement* element) {
    632   CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
    633   return element && !element->isNull() && element->formControlType() == kMonth;
    634 }
    635 
    636 // All text fields, including password fields, should be extracted.
    637 bool IsTextInput(const WebInputElement* element) {
    638   return element && !element->isNull() && element->isTextField();
    639 }
    640 
    641 bool IsSelectElement(const WebFormControlElement& element) {
    642   // Static for improved performance.
    643   CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
    644   return !element.isNull() && element.formControlType() == kSelectOne;
    645 }
    646 
    647 bool IsTextAreaElement(const WebFormControlElement& element) {
    648   // Static for improved performance.
    649   CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
    650   return !element.isNull() && element.formControlType() == kTextArea;
    651 }
    652 
    653 bool IsCheckableElement(const WebInputElement* element) {
    654   if (!element || element->isNull())
    655     return false;
    656 
    657   return element->isCheckbox() || element->isRadioButton();
    658 }
    659 
    660 bool IsAutofillableInputElement(const WebInputElement* element) {
    661   return IsTextInput(element) ||
    662          IsMonthInput(element) ||
    663          IsCheckableElement(element);
    664 }
    665 
    666 const base::string16 GetFormIdentifier(const WebFormElement& form) {
    667   base::string16 identifier = form.name();
    668   CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
    669   if (identifier.empty())
    670     identifier = form.getAttribute(kId);
    671 
    672   return identifier;
    673 }
    674 
    675 bool IsWebNodeVisible(const blink::WebNode& node) {
    676   // In the bug http://crbug.com/237216 the form's bounding box is empty
    677   // however the form has non empty children. Thus we need to look at the
    678   // form's children.
    679   int kNodeSearchDepth = 2;
    680   return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
    681 }
    682 
    683 bool ClickElement(const WebDocument& document,
    684                   const WebElementDescriptor& element_descriptor) {
    685   WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor);
    686   blink::WebElement element;
    687 
    688   switch (element_descriptor.retrieval_method) {
    689     case WebElementDescriptor::CSS_SELECTOR: {
    690       WebExceptionCode ec = 0;
    691       element = document.querySelector(web_descriptor, ec);
    692       if (ec)
    693         DVLOG(1) << "Query selector failed. Error code: " << ec << ".";
    694       break;
    695     }
    696     case WebElementDescriptor::ID:
    697       element = document.getElementById(web_descriptor);
    698       break;
    699     case WebElementDescriptor::NONE:
    700       return true;
    701   }
    702 
    703   if (element.isNull()) {
    704     DVLOG(1) << "Could not find "
    705              << element_descriptor.descriptor
    706              << " by "
    707              << RetrievalMethodToString(element_descriptor.retrieval_method)
    708              << ".";
    709     return false;
    710   }
    711 
    712   element.simulateClick();
    713   return true;
    714 }
    715 
    716 // Fills |autofillable_elements| with all the auto-fillable form control
    717 // elements in |form_element|.
    718 void ExtractAutofillableElements(
    719     const WebFormElement& form_element,
    720     RequirementsMask requirements,
    721     std::vector<WebFormControlElement>* autofillable_elements) {
    722   WebVector<WebFormControlElement> control_elements;
    723   form_element.getFormControlElements(control_elements);
    724 
    725   autofillable_elements->clear();
    726   for (size_t i = 0; i < control_elements.size(); ++i) {
    727     WebFormControlElement element = control_elements[i];
    728     if (!IsAutofillableElement(element))
    729       continue;
    730 
    731     if (requirements & REQUIRE_AUTOCOMPLETE) {
    732       // TODO(isherman): WebKit currently doesn't handle the autocomplete
    733       // attribute for select or textarea elements, but it probably should.
    734       WebInputElement* input_element = toWebInputElement(&control_elements[i]);
    735       if (IsAutofillableInputElement(input_element) &&
    736           !SatisfiesRequireAutocomplete(*input_element))
    737         continue;
    738     }
    739 
    740     autofillable_elements->push_back(element);
    741   }
    742 }
    743 
    744 void WebFormControlElementToFormField(const WebFormControlElement& element,
    745                                       ExtractMask extract_mask,
    746                                       FormFieldData* field) {
    747   DCHECK(field);
    748   DCHECK(!element.isNull());
    749   CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
    750 
    751   // The label is not officially part of a WebFormControlElement; however, the
    752   // labels for all form control elements are scraped from the DOM and set in
    753   // WebFormElementToFormData.
    754   field->name = element.nameForAutofill();
    755   field->form_control_type = base::UTF16ToUTF8(element.formControlType());
    756   field->autocomplete_attribute =
    757       base::UTF16ToUTF8(element.getAttribute(kAutocomplete));
    758   if (field->autocomplete_attribute.size() > kMaxDataLength) {
    759     // Discard overly long attribute values to avoid DOS-ing the browser
    760     // process.  However, send over a default string to indicate that the
    761     // attribute was present.
    762     field->autocomplete_attribute = "x-max-data-length-exceeded";
    763   }
    764 
    765   if (!IsAutofillableElement(element))
    766     return;
    767 
    768   const WebInputElement* input_element = toWebInputElement(&element);
    769   if (IsAutofillableInputElement(input_element) ||
    770       IsTextAreaElement(element)) {
    771     field->is_autofilled = element.isAutofilled();
    772     field->is_focusable = element.isFocusable();
    773     field->should_autocomplete = element.autoComplete();
    774     field->text_direction = element.directionForFormData() ==
    775         "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
    776   }
    777 
    778   if (IsAutofillableInputElement(input_element)) {
    779     if (IsTextInput(input_element))
    780       field->max_length = input_element->maxLength();
    781 
    782     field->is_checkable = IsCheckableElement(input_element);
    783     field->is_checked = input_element->isChecked();
    784   } else if (IsTextAreaElement(element)) {
    785     // Nothing more to do in this case.
    786   } else if (extract_mask & EXTRACT_OPTIONS) {
    787     // Set option strings on the field if available.
    788     DCHECK(IsSelectElement(element));
    789     const WebSelectElement select_element = element.toConst<WebSelectElement>();
    790     GetOptionStringsFromElement(select_element,
    791                                 &field->option_values,
    792                                 &field->option_contents);
    793   }
    794 
    795   if (!(extract_mask & EXTRACT_VALUE))
    796     return;
    797 
    798   base::string16 value = element.value();
    799 
    800   if (IsSelectElement(element) && (extract_mask & EXTRACT_OPTION_TEXT)) {
    801     const WebSelectElement select_element = element.toConst<WebSelectElement>();
    802     // Convert the |select_element| value to text if requested.
    803     WebVector<WebElement> list_items = select_element.listItems();
    804     for (size_t i = 0; i < list_items.size(); ++i) {
    805       if (IsOptionElement(list_items[i])) {
    806         const WebOptionElement option_element =
    807             list_items[i].toConst<WebOptionElement>();
    808         if (option_element.value() == value) {
    809           value = option_element.text();
    810           break;
    811         }
    812       }
    813     }
    814   }
    815 
    816   // Constrain the maximum data length to prevent a malicious site from DOS'ing
    817   // the browser: http://crbug.com/49332
    818   if (value.size() > kMaxDataLength)
    819     value = value.substr(0, kMaxDataLength);
    820 
    821   field->value = value;
    822 }
    823 
    824 bool WebFormElementToFormData(
    825     const blink::WebFormElement& form_element,
    826     const blink::WebFormControlElement& form_control_element,
    827     RequirementsMask requirements,
    828     ExtractMask extract_mask,
    829     FormData* form,
    830     FormFieldData* field) {
    831   CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
    832   CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
    833   CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
    834 
    835   const WebFrame* frame = form_element.document().frame();
    836   if (!frame)
    837     return false;
    838 
    839   if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete())
    840     return false;
    841 
    842   form->name = GetFormIdentifier(form_element);
    843   form->origin = frame->document().url();
    844   form->action = frame->document().completeURL(form_element.action());
    845   form->user_submitted = form_element.wasUserSubmitted();
    846 
    847   // If the completed URL is not valid, just use the action we get from
    848   // WebKit.
    849   if (!form->action.is_valid())
    850     form->action = GURL(form_element.action());
    851 
    852   // A map from a FormFieldData's name to the FormFieldData itself.
    853   std::map<base::string16, FormFieldData*> name_map;
    854 
    855   // The extracted FormFields.  We use pointers so we can store them in
    856   // |name_map|.
    857   ScopedVector<FormFieldData> form_fields;
    858 
    859   WebVector<WebFormControlElement> control_elements;
    860   form_element.getFormControlElements(control_elements);
    861 
    862   // A vector of bools that indicate whether each field in the form meets the
    863   // requirements and thus will be in the resulting |form|.
    864   std::vector<bool> fields_extracted(control_elements.size(), false);
    865 
    866   for (size_t i = 0; i < control_elements.size(); ++i) {
    867     const WebFormControlElement& control_element = control_elements[i];
    868 
    869     if (!IsAutofillableElement(control_element))
    870       continue;
    871 
    872     const WebInputElement* input_element = toWebInputElement(&control_element);
    873     if (requirements & REQUIRE_AUTOCOMPLETE &&
    874         IsAutofillableInputElement(input_element) &&
    875         !SatisfiesRequireAutocomplete(*input_element))
    876       continue;
    877 
    878     // Create a new FormFieldData, fill it out and map it to the field's name.
    879     FormFieldData* form_field = new FormFieldData;
    880     WebFormControlElementToFormField(control_element, extract_mask, form_field);
    881     form_fields.push_back(form_field);
    882     // TODO(jhawkins): A label element is mapped to a form control element's id.
    883     // field->name() will contain the id only if the name does not exist.  Add
    884     // an id() method to WebFormControlElement and use that here.
    885     name_map[form_field->name] = form_field;
    886     fields_extracted[i] = true;
    887   }
    888 
    889   // If we failed to extract any fields, give up.  Also, to avoid overly
    890   // expensive computation, we impose a maximum number of allowable fields.
    891   if (form_fields.empty() || form_fields.size() > kMaxParseableFields)
    892     return false;
    893 
    894   // Loop through the label elements inside the form element.  For each label
    895   // element, get the corresponding form control element, use the form control
    896   // element's name as a key into the <name, FormFieldData> map to find the
    897   // previously created FormFieldData and set the FormFieldData's label to the
    898   // label.firstChild().nodeValue() of the label element.
    899   WebElementCollection labels = form_element.getElementsByHTMLTagName(kLabel);
    900   DCHECK(!labels.isNull());
    901   for (WebElement item = labels.firstItem(); !item.isNull();
    902        item = labels.nextItem()) {
    903     WebLabelElement label = item.to<WebLabelElement>();
    904     WebFormControlElement field_element =
    905         label.correspondingControl().to<WebFormControlElement>();
    906 
    907     base::string16 element_name;
    908     if (field_element.isNull()) {
    909       // Sometimes site authors will incorrectly specify the corresponding
    910       // field element's name rather than its id, so we compensate here.
    911       element_name = label.getAttribute(kFor);
    912     } else if (
    913         !field_element.isFormControlElement() ||
    914         field_element.formControlType() == kHidden) {
    915       continue;
    916     } else {
    917       element_name = field_element.nameForAutofill();
    918     }
    919 
    920     std::map<base::string16, FormFieldData*>::iterator iter =
    921         name_map.find(element_name);
    922     if (iter != name_map.end()) {
    923       base::string16 label_text = FindChildText(label);
    924 
    925       // Concatenate labels because some sites might have multiple label
    926       // candidates.
    927       if (!iter->second->label.empty() && !label_text.empty())
    928         iter->second->label += base::ASCIIToUTF16(" ");
    929       iter->second->label += label_text;
    930     }
    931   }
    932 
    933   // Loop through the form control elements, extracting the label text from
    934   // the DOM.  We use the |fields_extracted| vector to make sure we assign the
    935   // extracted label to the correct field, as it's possible |form_fields| will
    936   // not contain all of the elements in |control_elements|.
    937   for (size_t i = 0, field_idx = 0;
    938        i < control_elements.size() && field_idx < form_fields.size(); ++i) {
    939     // This field didn't meet the requirements, so don't try to find a label
    940     // for it.
    941     if (!fields_extracted[i])
    942       continue;
    943 
    944     const WebFormControlElement& control_element = control_elements[i];
    945     if (form_fields[field_idx]->label.empty())
    946       form_fields[field_idx]->label = InferLabelForElement(control_element);
    947 
    948     if (field && form_control_element == control_element)
    949       *field = *form_fields[field_idx];
    950 
    951     ++field_idx;
    952   }
    953 
    954   // Copy the created FormFields into the resulting FormData object.
    955   for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin();
    956        iter != form_fields.end(); ++iter) {
    957     form->fields.push_back(**iter);
    958   }
    959 
    960   return true;
    961 }
    962 
    963 bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element,
    964                                            FormData* form,
    965                                            FormFieldData* field,
    966                                            RequirementsMask requirements) {
    967   if (!IsAutofillableElement(element))
    968     return false;
    969 
    970   const WebFormElement form_element = element.form();
    971   if (form_element.isNull())
    972     return false;
    973 
    974   ExtractMask extract_mask =
    975       static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
    976   return WebFormElementToFormData(form_element,
    977                                   element,
    978                                   requirements,
    979                                   extract_mask,
    980                                   form,
    981                                   field);
    982 }
    983 
    984 void FillForm(const FormData& form, const WebFormControlElement& element) {
    985   WebFormElement form_element = element.form();
    986   if (form_element.isNull())
    987     return;
    988 
    989   ForEachMatchingFormField(form_element,
    990                            element,
    991                            form,
    992                            true, /* only_focusable_elements */
    993                            false, /* don't force override */
    994                            &FillFormField);
    995 }
    996 
    997 void FillFormIncludingNonFocusableElements(const FormData& form_data,
    998                                            const WebFormElement& form_element) {
    999   if (form_element.isNull())
   1000     return;
   1001 
   1002   ForEachMatchingFormField(form_element,
   1003                            WebInputElement(),
   1004                            form_data,
   1005                            false, /* only_focusable_elements */
   1006                            true, /* force override */
   1007                            &FillFormField);
   1008 }
   1009 
   1010 void PreviewForm(const FormData& form, const WebFormControlElement& element) {
   1011   WebFormElement form_element = element.form();
   1012   if (form_element.isNull())
   1013     return;
   1014 
   1015   ForEachMatchingFormField(form_element,
   1016                            element,
   1017                            form,
   1018                            true, /* only_focusable_elements */
   1019                            false, /* dont force override */
   1020                            &PreviewFormField);
   1021 }
   1022 
   1023 bool ClearPreviewedFormWithElement(const WebFormControlElement& element,
   1024                                    bool was_autofilled) {
   1025   WebFormElement form_element = element.form();
   1026   if (form_element.isNull())
   1027     return false;
   1028 
   1029   std::vector<WebFormControlElement> control_elements;
   1030   ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
   1031                               &control_elements);
   1032   for (size_t i = 0; i < control_elements.size(); ++i) {
   1033     // There might be unrelated elements in this form which have already been
   1034     // auto-filled.  For example, the user might have already filled the address
   1035     // part of a form and now be dealing with the credit card section.  We only
   1036     // want to reset the auto-filled status for fields that were previewed.
   1037     WebFormControlElement control_element = control_elements[i];
   1038 
   1039     // Only text input, textarea and select elements can be previewed.
   1040     WebInputElement* input_element = toWebInputElement(&control_element);
   1041     if (!IsTextInput(input_element) &&
   1042         !IsMonthInput(input_element) &&
   1043         !IsTextAreaElement(control_element) &&
   1044         !IsSelectElement(control_element))
   1045       continue;
   1046 
   1047     // If the element is not auto-filled, we did not preview it,
   1048     // so there is nothing to reset.
   1049     if(!control_element.isAutofilled())
   1050       continue;
   1051 
   1052     if ((IsTextInput(input_element) ||
   1053          IsMonthInput(input_element) ||
   1054          IsTextAreaElement(control_element) ||
   1055          IsSelectElement(control_element)) &&
   1056         control_element.suggestedValue().isEmpty())
   1057       continue;
   1058 
   1059     // Clear the suggested value. For the initiating node, also restore the
   1060     // original value.
   1061     if (IsTextInput(input_element) || IsMonthInput(input_element) ||
   1062         IsTextAreaElement(control_element)) {
   1063       control_element.setSuggestedValue(WebString());
   1064       bool is_initiating_node = (element == control_element);
   1065       if (is_initiating_node) {
   1066         control_element.setAutofilled(was_autofilled);
   1067         // Clearing the suggested value in the focused node (above) can cause
   1068         // selection to be lost. We force selection range to restore the text
   1069         // cursor.
   1070         int length = control_element.value().length();
   1071         control_element.setSelectionRange(length, length);
   1072       } else {
   1073         control_element.setAutofilled(false);
   1074       }
   1075     } else if (IsSelectElement(control_element)) {
   1076       control_element.setSuggestedValue(WebString());
   1077       control_element.setAutofilled(false);
   1078     }
   1079   }
   1080 
   1081   return true;
   1082 }
   1083 
   1084 bool FormWithElementIsAutofilled(const WebInputElement& element) {
   1085   WebFormElement form_element = element.form();
   1086   if (form_element.isNull())
   1087     return false;
   1088 
   1089   std::vector<WebFormControlElement> control_elements;
   1090   ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
   1091                               &control_elements);
   1092   for (size_t i = 0; i < control_elements.size(); ++i) {
   1093     WebInputElement* input_element = toWebInputElement(&control_elements[i]);
   1094     if (!IsAutofillableInputElement(input_element))
   1095       continue;
   1096 
   1097     if (input_element->isAutofilled())
   1098       return true;
   1099   }
   1100 
   1101   return false;
   1102 }
   1103 
   1104 bool IsWebpageEmpty(const blink::WebFrame* frame) {
   1105   blink::WebDocument document = frame->document();
   1106 
   1107   return IsWebElementEmpty(document.head()) &&
   1108          IsWebElementEmpty(document.body());
   1109 }
   1110 
   1111 bool IsWebElementEmpty(const blink::WebElement& element) {
   1112   // This array contains all tags which can be present in an empty page.
   1113   const char* const kAllowedValue[] = {
   1114     "script",
   1115     "meta",
   1116     "title",
   1117   };
   1118   const size_t kAllowedValueLength = arraysize(kAllowedValue);
   1119 
   1120   if (element.isNull())
   1121     return true;
   1122   // The childNodes method is not a const method. Therefore it cannot be called
   1123   // on a const reference. Therefore we need a const cast.
   1124   const blink::WebNodeList& children =
   1125       const_cast<blink::WebElement&>(element).childNodes();
   1126   for (size_t i = 0; i < children.length(); ++i) {
   1127     const blink::WebNode& item = children.item(i);
   1128 
   1129     if (item.isTextNode() &&
   1130         !base::ContainsOnlyChars(item.nodeValue().utf8(),
   1131                                  base::kWhitespaceASCII))
   1132       return false;
   1133 
   1134     // We ignore all other items with names which begin with
   1135     // the character # because they are not html tags.
   1136     if (item.nodeName().utf8()[0] == '#')
   1137       continue;
   1138 
   1139     bool tag_is_allowed = false;
   1140     // Test if the item name is in the kAllowedValue array
   1141     for (size_t allowed_value_index = 0;
   1142          allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
   1143       if (HasTagName(item,
   1144                      WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
   1145         tag_is_allowed = true;
   1146         break;
   1147       }
   1148     }
   1149     if (!tag_is_allowed)
   1150       return false;
   1151   }
   1152   return true;
   1153 }
   1154 
   1155 gfx::RectF GetScaledBoundingBox(float scale, WebFormControlElement* element) {
   1156   gfx::Rect bounding_box(element->boundsInViewportSpace());
   1157   return gfx::RectF(bounding_box.x() * scale,
   1158                     bounding_box.y() * scale,
   1159                     bounding_box.width() * scale,
   1160                     bounding_box.height() * scale);
   1161 }
   1162 
   1163 }  // namespace autofill
   1164