Home | History | Annotate | Download | only in renderer
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/autofill/content/renderer/form_autofill_util.h"
      6 
      7 #include <map>
      8 
      9 #include "base/command_line.h"
     10 #include "base/logging.h"
     11 #include "base/memory/scoped_vector.h"
     12 #include "base/metrics/field_trial.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/strings/utf_string_conversions.h"
     15 #include "components/autofill/core/common/autofill_switches.h"
     16 #include "components/autofill/core/common/form_data.h"
     17 #include "components/autofill/core/common/form_field_data.h"
     18 #include "components/autofill/core/common/web_element_descriptor.h"
     19 #include "third_party/WebKit/public/platform/WebString.h"
     20 #include "third_party/WebKit/public/platform/WebVector.h"
     21 #include "third_party/WebKit/public/web/WebDocument.h"
     22 #include "third_party/WebKit/public/web/WebElement.h"
     23 #include "third_party/WebKit/public/web/WebExceptionCode.h"
     24 #include "third_party/WebKit/public/web/WebFormControlElement.h"
     25 #include "third_party/WebKit/public/web/WebFormElement.h"
     26 #include "third_party/WebKit/public/web/WebFrame.h"
     27 #include "third_party/WebKit/public/web/WebInputElement.h"
     28 #include "third_party/WebKit/public/web/WebLabelElement.h"
     29 #include "third_party/WebKit/public/web/WebNode.h"
     30 #include "third_party/WebKit/public/web/WebNodeList.h"
     31 #include "third_party/WebKit/public/web/WebOptionElement.h"
     32 #include "third_party/WebKit/public/web/WebSelectElement.h"
     33 #include "third_party/WebKit/public/web/WebTextAreaElement.h"
     34 
     35 using blink::WebDocument;
     36 using blink::WebElement;
     37 using blink::WebExceptionCode;
     38 using blink::WebFormControlElement;
     39 using blink::WebFormElement;
     40 using blink::WebFrame;
     41 using blink::WebInputElement;
     42 using blink::WebLabelElement;
     43 using blink::WebNode;
     44 using blink::WebNodeList;
     45 using blink::WebOptionElement;
     46 using blink::WebSelectElement;
     47 using blink::WebTextAreaElement;
     48 using blink::WebString;
     49 using blink::WebVector;
     50 
     51 namespace autofill {
     52 namespace {
     53 
     54 // The maximum length allowed for form data.
     55 const size_t kMaxDataLength = 1024;
     56 
     57 // A bit field mask for FillForm functions to not fill some fields.
     58 enum FieldFilterMask {
     59   FILTER_NONE                       = 0,
     60   FILTER_DISABLED_ELEMENTS          = 1 << 0,
     61   FILTER_READONLY_ELEMENTS          = 1 << 1,
     62   FILTER_NON_FOCUSABLE_ELEMENTS     = 1 << 2,
     63   FILTER_ALL_NON_EDITIABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS |
     64                                       FILTER_READONLY_ELEMENTS |
     65                                       FILTER_NON_FOCUSABLE_ELEMENTS,
     66 };
     67 
     68 bool IsOptionElement(const WebElement& element) {
     69   CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
     70   return element.hasTagName(kOption);
     71 }
     72 
     73 bool IsScriptElement(const WebElement& element) {
     74   CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
     75   return element.hasTagName(kScript);
     76 }
     77 
     78 bool IsNoScriptElement(const WebElement& element) {
     79   CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
     80   return element.hasTagName(kNoScript);
     81 }
     82 
     83 bool HasTagName(const WebNode& node, const blink::WebString& tag) {
     84   return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
     85 }
     86 
     87 bool IsAutofillableElement(const WebFormControlElement& element) {
     88   const WebInputElement* input_element = toWebInputElement(&element);
     89   return IsAutofillableInputElement(input_element) ||
     90          IsSelectElement(element) ||
     91          IsTextAreaElement(element);
     92 }
     93 
     94 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
     95 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
     96   return input_element.autoComplete();
     97 }
     98 
     99 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
    100 // to a single space.  If |force_whitespace| is true, then the resulting string
    101 // is guaranteed to have a space between |prefix| and |suffix|.  Otherwise, the
    102 // result includes a space only if |prefix| has trailing whitespace or |suffix|
    103 // has leading whitespace.
    104 // A few examples:
    105 //  * CombineAndCollapseWhitespace("foo", "bar", false)       -> "foobar"
    106 //  * CombineAndCollapseWhitespace("foo", "bar", true)        -> "foo bar"
    107 //  * CombineAndCollapseWhitespace("foo ", "bar", false)      -> "foo bar"
    108 //  * CombineAndCollapseWhitespace("foo", " bar", false)      -> "foo bar"
    109 //  * CombineAndCollapseWhitespace("foo", " bar", true)       -> "foo bar"
    110 //  * CombineAndCollapseWhitespace("foo   ", "   bar", false) -> "foo bar"
    111 //  * CombineAndCollapseWhitespace(" foo", "bar ", false)     -> " foobar "
    112 //  * CombineAndCollapseWhitespace(" foo", "bar ", true)      -> " foo bar "
    113 const base::string16 CombineAndCollapseWhitespace(
    114     const base::string16& prefix,
    115     const base::string16& suffix,
    116     bool force_whitespace) {
    117   base::string16 prefix_trimmed;
    118   TrimPositions prefix_trailing_whitespace =
    119       TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed);
    120 
    121   // Recursively compute the children's text.
    122   base::string16 suffix_trimmed;
    123   TrimPositions suffix_leading_whitespace =
    124       TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed);
    125 
    126   if (prefix_trailing_whitespace || suffix_leading_whitespace ||
    127       force_whitespace) {
    128     return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed;
    129   } else {
    130     return prefix_trimmed + suffix_trimmed;
    131   }
    132 }
    133 
    134 // This is a helper function for the FindChildText() function (see below).
    135 // Search depth is limited with the |depth| parameter.
    136 base::string16 FindChildTextInner(const WebNode& node, int depth) {
    137   if (depth <= 0 || node.isNull())
    138     return base::string16();
    139 
    140   // Skip over comments.
    141   if (node.nodeType() == WebNode::CommentNode)
    142     return FindChildTextInner(node.nextSibling(), depth - 1);
    143 
    144   if (node.nodeType() != WebNode::ElementNode &&
    145       node.nodeType() != WebNode::TextNode)
    146     return base::string16();
    147 
    148   // Ignore elements known not to contain inferable labels.
    149   if (node.isElementNode()) {
    150     const WebElement element = node.toConst<WebElement>();
    151     if (IsOptionElement(element) ||
    152         IsScriptElement(element) ||
    153         IsNoScriptElement(element) ||
    154         (element.isFormControlElement() &&
    155          IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
    156       return base::string16();
    157     }
    158   }
    159 
    160   // Extract the text exactly at this node.
    161   base::string16 node_text = node.nodeValue();
    162 
    163   // Recursively compute the children's text.
    164   // Preserve inter-element whitespace separation.
    165   base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
    166   bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
    167   node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
    168 
    169   // Recursively compute the siblings' text.
    170   // Again, preserve inter-element whitespace separation.
    171   base::string16 sibling_text =
    172       FindChildTextInner(node.nextSibling(), depth - 1);
    173   add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
    174   node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
    175 
    176   return node_text;
    177 }
    178 
    179 // Returns the aggregated values of the descendants of |element| that are
    180 // non-empty text nodes.  This is a faster alternative to |innerText()| for
    181 // performance critical operations.  It does a full depth-first search so can be
    182 // used when the structure is not directly known.  However, unlike with
    183 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
    184 // Whitespace is trimmed from text accumulated at descendant nodes.
    185 base::string16 FindChildText(const WebNode& node) {
    186   if (node.isTextNode())
    187     return node.nodeValue();
    188 
    189   WebNode child = node.firstChild();
    190 
    191   const int kChildSearchDepth = 10;
    192   base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
    193   TrimWhitespace(node_text, TRIM_ALL, &node_text);
    194   return node_text;
    195 }
    196 
    197 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    198 // a previous sibling of |element|,
    199 // e.g. Some Text <input ...>
    200 // or   Some <span>Text</span> <input ...>
    201 // or   <p>Some Text</p><input ...>
    202 // or   <label>Some Text</label> <input ...>
    203 // or   Some Text <img><input ...>
    204 // or   <b>Some Text</b><br/> <input ...>.
    205 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
    206   base::string16 inferred_label;
    207   WebNode previous = element;
    208   while (true) {
    209     previous = previous.previousSibling();
    210     if (previous.isNull())
    211       break;
    212 
    213     // Skip over comments.
    214     WebNode::NodeType node_type = previous.nodeType();
    215     if (node_type == WebNode::CommentNode)
    216       continue;
    217 
    218     // Otherwise, only consider normal HTML elements and their contents.
    219     if (node_type != WebNode::TextNode &&
    220         node_type != WebNode::ElementNode)
    221       break;
    222 
    223     // A label might be split across multiple "lightweight" nodes.
    224     // Coalesce any text contained in multiple consecutive
    225     //  (a) plain text nodes or
    226     //  (b) inline HTML elements that are essentially equivalent to text nodes.
    227     CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
    228     CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
    229     CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
    230     CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
    231     if (previous.isTextNode() ||
    232         HasTagName(previous, kBold) || HasTagName(previous, kStrong) ||
    233         HasTagName(previous, kSpan) || HasTagName(previous, kFont)) {
    234       base::string16 value = FindChildText(previous);
    235       // A text node's value will be empty if it is for a line break.
    236       bool add_space = previous.isTextNode() && value.empty();
    237       inferred_label =
    238           CombineAndCollapseWhitespace(value, inferred_label, add_space);
    239       continue;
    240     }
    241 
    242     // If we have identified a partial label and have reached a non-lightweight
    243     // element, consider the label to be complete.
    244     base::string16 trimmed_label;
    245     TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label);
    246     if (!trimmed_label.empty())
    247       break;
    248 
    249     // <img> and <br> tags often appear between the input element and its
    250     // label text, so skip over them.
    251     CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
    252     CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
    253     if (HasTagName(previous, kImage) || HasTagName(previous, kBreak))
    254       continue;
    255 
    256     // We only expect <p> and <label> tags to contain the full label text.
    257     CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
    258     CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
    259     if (HasTagName(previous, kPage) || HasTagName(previous, kLabel))
    260       inferred_label = FindChildText(previous);
    261 
    262     break;
    263   }
    264 
    265   TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
    266   return inferred_label;
    267 }
    268 
    269 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    270 // enclosing list item,
    271 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
    272 base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
    273   WebNode parent = element.parentNode();
    274   CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
    275   while (!parent.isNull() && parent.isElementNode() &&
    276          !parent.to<WebElement>().hasTagName(kListItem)) {
    277     parent = parent.parentNode();
    278   }
    279 
    280   if (!parent.isNull() && HasTagName(parent, kListItem))
    281     return FindChildText(parent);
    282 
    283   return base::string16();
    284 }
    285 
    286 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    287 // surrounding table structure,
    288 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
    289 // or   <tr><th>Some Text</th><td><input ...></td></tr>
    290 // or   <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
    291 // or   <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
    292 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
    293   CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
    294   WebNode parent = element.parentNode();
    295   while (!parent.isNull() && parent.isElementNode() &&
    296          !parent.to<WebElement>().hasTagName(kTableCell)) {
    297     parent = parent.parentNode();
    298   }
    299 
    300   if (parent.isNull())
    301     return base::string16();
    302 
    303   // Check all previous siblings, skipping non-element nodes, until we find a
    304   // non-empty text block.
    305   base::string16 inferred_label;
    306   WebNode previous = parent.previousSibling();
    307   CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
    308   while (inferred_label.empty() && !previous.isNull()) {
    309     if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
    310       inferred_label = FindChildText(previous);
    311 
    312     previous = previous.previousSibling();
    313   }
    314 
    315   return inferred_label;
    316 }
    317 
    318 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    319 // surrounding table structure,
    320 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
    321 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
    322   CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
    323   WebNode parent = element.parentNode();
    324   while (!parent.isNull() && parent.isElementNode() &&
    325          !parent.to<WebElement>().hasTagName(kTableRow)) {
    326     parent = parent.parentNode();
    327   }
    328 
    329   if (parent.isNull())
    330     return base::string16();
    331 
    332   // Check all previous siblings, skipping non-element nodes, until we find a
    333   // non-empty text block.
    334   base::string16 inferred_label;
    335   WebNode previous = parent.previousSibling();
    336   while (inferred_label.empty() && !previous.isNull()) {
    337     if (HasTagName(previous, kTableRow))
    338       inferred_label = FindChildText(previous);
    339 
    340     previous = previous.previousSibling();
    341   }
    342 
    343   return inferred_label;
    344 }
    345 
    346 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    347 // a surrounding div table,
    348 // e.g. <div>Some Text<span><input ...></span></div>
    349 // e.g. <div>Some Text</div><div><input ...></div>
    350 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
    351   WebNode node = element.parentNode();
    352   bool looking_for_parent = true;
    353 
    354   // Search the sibling and parent <div>s until we find a candidate label.
    355   base::string16 inferred_label;
    356   CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
    357   CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
    358   CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
    359   while (inferred_label.empty() && !node.isNull()) {
    360     if (HasTagName(node, kDiv)) {
    361       looking_for_parent = false;
    362       inferred_label = FindChildText(node);
    363     } else if (looking_for_parent &&
    364                (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) {
    365       // If the element is in a table or fieldset, its label most likely is too.
    366       break;
    367     }
    368 
    369     if (node.previousSibling().isNull()) {
    370       // If there are no more siblings, continue walking up the tree.
    371       looking_for_parent = true;
    372     }
    373 
    374     if (looking_for_parent)
    375       node = node.parentNode();
    376     else
    377       node = node.previousSibling();
    378   }
    379 
    380   return inferred_label;
    381 }
    382 
    383 // Helper for |InferLabelForElement()| that infers a label, if possible, from
    384 // a surrounding definition list,
    385 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
    386 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
    387 base::string16 InferLabelFromDefinitionList(
    388     const WebFormControlElement& element) {
    389   CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
    390   WebNode parent = element.parentNode();
    391   while (!parent.isNull() && parent.isElementNode() &&
    392          !parent.to<WebElement>().hasTagName(kDefinitionData))
    393     parent = parent.parentNode();
    394 
    395   if (parent.isNull() || !HasTagName(parent, kDefinitionData))
    396     return base::string16();
    397 
    398   // Skip by any intervening text nodes.
    399   WebNode previous = parent.previousSibling();
    400   while (!previous.isNull() && previous.isTextNode())
    401     previous = previous.previousSibling();
    402 
    403   CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
    404   if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
    405     return base::string16();
    406 
    407   return FindChildText(previous);
    408 }
    409 
    410 // Infers corresponding label for |element| from surrounding context in the DOM,
    411 // e.g. the contents of the preceding <p> tag or text element.
    412 base::string16 InferLabelForElement(const WebFormControlElement& element) {
    413   base::string16 inferred_label = InferLabelFromPrevious(element);
    414   if (!inferred_label.empty())
    415     return inferred_label;
    416 
    417   // If we didn't find a label, check for list item case.
    418   inferred_label = InferLabelFromListItem(element);
    419   if (!inferred_label.empty())
    420     return inferred_label;
    421 
    422   // If we didn't find a label, check for table cell case.
    423   inferred_label = InferLabelFromTableColumn(element);
    424   if (!inferred_label.empty())
    425     return inferred_label;
    426 
    427   // If we didn't find a label, check for table row case.
    428   inferred_label = InferLabelFromTableRow(element);
    429   if (!inferred_label.empty())
    430     return inferred_label;
    431 
    432   // If we didn't find a label, check for definition list case.
    433   inferred_label = InferLabelFromDefinitionList(element);
    434   if (!inferred_label.empty())
    435     return inferred_label;
    436 
    437   // If we didn't find a label, check for div table case.
    438   return InferLabelFromDivTable(element);
    439 }
    440 
    441 // Fills |option_strings| with the values of the <option> elements present in
    442 // |select_element|.
    443 void GetOptionStringsFromElement(const WebSelectElement& select_element,
    444                                  std::vector<base::string16>* option_values,
    445                                  std::vector<base::string16>* option_contents) {
    446   DCHECK(!select_element.isNull());
    447 
    448   option_values->clear();
    449   option_contents->clear();
    450   WebVector<WebElement> list_items = select_element.listItems();
    451   option_values->reserve(list_items.size());
    452   option_contents->reserve(list_items.size());
    453   for (size_t i = 0; i < list_items.size(); ++i) {
    454     if (IsOptionElement(list_items[i])) {
    455       const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
    456       option_values->push_back(option.value());
    457       option_contents->push_back(option.text());
    458     }
    459   }
    460 }
    461 
    462 // The callback type used by |ForEachMatchingFormField()|.
    463 typedef void (*Callback)(const FormFieldData&,
    464                          bool, /* is_initiating_element */
    465                          blink::WebFormControlElement*);
    466 
    467 // For each autofillable field in |data| that matches a field in the |form|,
    468 // the |callback| is invoked with the corresponding |form| field data.
    469 void ForEachMatchingFormField(const WebFormElement& form_element,
    470                               const WebElement& initiating_element,
    471                               const FormData& data,
    472                               FieldFilterMask filters,
    473                               bool force_override,
    474                               Callback callback) {
    475   std::vector<WebFormControlElement> control_elements;
    476   ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
    477                               &control_elements);
    478 
    479   if (control_elements.size() != data.fields.size()) {
    480     // This case should be reachable only for pathological websites and tests,
    481     // which add or remove form fields while the user is interacting with the
    482     // Autofill popup.
    483     return;
    484   }
    485 
    486   // It's possible that the site has injected fields into the form after the
    487   // page has loaded, so we can't assert that the size of the cached control
    488   // elements is equal to the size of the fields in |form|.  Fortunately, the
    489   // one case in the wild where this happens, paypal.com signup form, the fields
    490   // are appended to the end of the form and are not visible.
    491   for (size_t i = 0; i < control_elements.size(); ++i) {
    492     WebFormControlElement* element = &control_elements[i];
    493 
    494     if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
    495       // This case should be reachable only for pathological websites, which
    496       // rename form fields while the user is interacting with the Autofill
    497       // popup.  I (isherman) am not aware of any such websites, and so am
    498       // optimistically including a NOTREACHED().  If you ever trip this check,
    499       // please file a bug against me.
    500       NOTREACHED();
    501       continue;
    502     }
    503 
    504     bool is_initiating_element = (*element == initiating_element);
    505 
    506     // Only autofill empty fields and the field that initiated the filling,
    507     // i.e. the field the user is currently editing and interacting with.
    508     const WebInputElement* input_element = toWebInputElement(element);
    509     if (!force_override && !is_initiating_element &&
    510         ((IsAutofillableInputElement(input_element) &&
    511           !input_element->value().isEmpty()) ||
    512          (IsTextAreaElement(*element) &&
    513           !element->toConst<WebTextAreaElement>().value().isEmpty())))
    514       continue;
    515 
    516     if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) ||
    517         ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) ||
    518         ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable()))
    519       continue;
    520 
    521     callback(data.fields[i], is_initiating_element, element);
    522   }
    523 }
    524 
    525 // Sets the |field|'s value to the value in |data|.
    526 // Also sets the "autofilled" attribute, causing the background to be yellow.
    527 void FillFormField(const FormFieldData& data,
    528                    bool is_initiating_node,
    529                    blink::WebFormControlElement* field) {
    530   // Nothing to fill.
    531   if (data.value.empty())
    532     return;
    533 
    534   field->setAutofilled(true);
    535 
    536   WebInputElement* input_element = toWebInputElement(field);
    537   if (IsTextInput(input_element) || IsMonthInput(input_element)) {
    538     // If the maxlength attribute contains a negative value, maxLength()
    539     // returns the default maxlength value.
    540     input_element->setValue(
    541         data.value.substr(0, input_element->maxLength()), true);
    542     if (is_initiating_node) {
    543       int length = input_element->value().length();
    544       input_element->setSelectionRange(length, length);
    545       // Clear the current IME composition (the underline), if there is one.
    546       input_element->document().frame()->unmarkText();
    547     }
    548   } else if (IsTextAreaElement(*field)) {
    549     WebTextAreaElement text_area = field->to<WebTextAreaElement>();
    550     if (text_area.value() != data.value) {
    551       text_area.setValue(data.value);
    552       text_area.dispatchFormControlChangeEvent();
    553     }
    554   } else if (IsSelectElement(*field)) {
    555     WebSelectElement select_element = field->to<WebSelectElement>();
    556     if (select_element.value() != data.value) {
    557       select_element.setValue(data.value);
    558       select_element.dispatchFormControlChangeEvent();
    559     }
    560   } else {
    561     DCHECK(IsCheckableElement(input_element));
    562     input_element->setChecked(data.is_checked, true);
    563   }
    564 }
    565 
    566 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
    567 // Also sets the "autofilled" attribute, causing the background to be yellow.
    568 void PreviewFormField(const FormFieldData& data,
    569                       bool is_initiating_node,
    570                       blink::WebFormControlElement* field) {
    571   // Nothing to preview.
    572   if (data.value.empty())
    573     return;
    574 
    575   // Only preview input fields. Excludes checkboxes and radio buttons, as there
    576   // is no provision for setSuggestedCheckedValue in WebInputElement.
    577   WebInputElement* input_element = toWebInputElement(field);
    578   if (!IsTextInput(input_element))
    579     return;
    580 
    581   // If the maxlength attribute contains a negative value, maxLength()
    582   // returns the default maxlength value.
    583   input_element->setSuggestedValue(
    584       data.value.substr(0, input_element->maxLength()));
    585   input_element->setAutofilled(true);
    586   if (is_initiating_node) {
    587     // Select the part of the text that the user didn't type.
    588     input_element->setSelectionRange(input_element->value().length(),
    589                                      input_element->suggestedValue().length());
    590   }
    591 }
    592 
    593 std::string RetrievalMethodToString(
    594     const WebElementDescriptor::RetrievalMethod& method) {
    595   switch (method) {
    596     case WebElementDescriptor::CSS_SELECTOR:
    597       return "CSS_SELECTOR";
    598     case WebElementDescriptor::ID:
    599       return "ID";
    600     case WebElementDescriptor::NONE:
    601       return "NONE";
    602   }
    603   NOTREACHED();
    604   return "UNKNOWN";
    605 }
    606 
    607 // Recursively checks whether |node| or any of its children have a non-empty
    608 // bounding box. The recursion depth is bounded by |depth|.
    609 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
    610   if (depth < 0)
    611     return false;
    612   if (node.hasNonEmptyBoundingBox())
    613     return true;
    614 
    615   // The childNodes method is not a const method. Therefore it cannot be called
    616   // on a const reference. Therefore we need a const cast.
    617   const blink::WebNodeList& children =
    618       const_cast<blink::WebNode&>(node).childNodes();
    619   size_t length = children.length();
    620   for (size_t i = 0; i < length; ++i) {
    621     const blink::WebNode& item = children.item(i);
    622     if (IsWebNodeVisibleImpl(item, depth - 1))
    623       return true;
    624   }
    625   return false;
    626 }
    627 
    628 }  // namespace
    629 
    630 const size_t kMaxParseableFields = 200;
    631 
    632 bool IsMonthInput(const WebInputElement* element) {
    633   CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
    634   return element && element->formControlType() == kMonth;
    635 }
    636 
    637 // All text fields, including password fields, should be extracted.
    638 bool IsTextInput(const WebInputElement* element) {
    639   return element && element->isTextField();
    640 }
    641 
    642 bool IsSelectElement(const WebFormControlElement& element) {
    643   // Static for improved performance.
    644   CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
    645   return element.formControlType() == kSelectOne;
    646 }
    647 
    648 bool IsTextAreaElement(const WebFormControlElement& element) {
    649   // Static for improved performance.
    650   CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
    651   return element.formControlType() == kTextArea;
    652 }
    653 
    654 bool IsCheckableElement(const WebInputElement* element) {
    655   if (!element)
    656     return false;
    657 
    658   return element->isCheckbox() || element->isRadioButton();
    659 }
    660 
    661 bool IsAutofillableInputElement(const WebInputElement* element) {
    662   return IsTextInput(element) ||
    663          IsMonthInput(element) ||
    664          IsCheckableElement(element);
    665 }
    666 
    667 const base::string16 GetFormIdentifier(const WebFormElement& form) {
    668   base::string16 identifier = form.name();
    669   CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
    670   if (identifier.empty())
    671     identifier = form.getAttribute(kId);
    672 
    673   return identifier;
    674 }
    675 
    676 bool IsWebNodeVisible(const blink::WebNode& node) {
    677   // In the bug http://crbug.com/237216 the form's bounding box is empty
    678   // however the form has non empty children. Thus we need to look at the
    679   // form's children.
    680   int kNodeSearchDepth = 2;
    681   return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
    682 }
    683 
    684 bool ClickElement(const WebDocument& document,
    685                   const WebElementDescriptor& element_descriptor) {
    686   WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor);
    687   blink::WebElement element;
    688 
    689   switch (element_descriptor.retrieval_method) {
    690     case WebElementDescriptor::CSS_SELECTOR: {
    691       WebExceptionCode ec = 0;
    692       element = document.querySelector(web_descriptor, ec);
    693       if (ec)
    694         DVLOG(1) << "Query selector failed. Error code: " << ec << ".";
    695       break;
    696     }
    697     case WebElementDescriptor::ID:
    698       element = document.getElementById(web_descriptor);
    699       break;
    700     case WebElementDescriptor::NONE:
    701       return true;
    702   }
    703 
    704   if (element.isNull()) {
    705     DVLOG(1) << "Could not find "
    706              << element_descriptor.descriptor
    707              << " by "
    708              << RetrievalMethodToString(element_descriptor.retrieval_method)
    709              << ".";
    710     return false;
    711   }
    712 
    713   element.simulateClick();
    714   return true;
    715 }
    716 
    717 // Fills |autofillable_elements| with all the auto-fillable form control
    718 // elements in |form_element|.
    719 void ExtractAutofillableElements(
    720     const WebFormElement& form_element,
    721     RequirementsMask requirements,
    722     std::vector<WebFormControlElement>* autofillable_elements) {
    723   WebVector<WebFormControlElement> control_elements;
    724   form_element.getFormControlElements(control_elements);
    725 
    726   autofillable_elements->clear();
    727   for (size_t i = 0; i < control_elements.size(); ++i) {
    728     WebFormControlElement element = control_elements[i];
    729     if (!IsAutofillableElement(element))
    730       continue;
    731 
    732     if (requirements & REQUIRE_AUTOCOMPLETE) {
    733       // TODO(isherman): WebKit currently doesn't handle the autocomplete
    734       // attribute for select or textarea elements, but it probably should.
    735       WebInputElement* input_element = toWebInputElement(&control_elements[i]);
    736       if (IsAutofillableInputElement(input_element) &&
    737           !SatisfiesRequireAutocomplete(*input_element))
    738         continue;
    739     }
    740 
    741     autofillable_elements->push_back(element);
    742   }
    743 }
    744 
    745 void WebFormControlElementToFormField(const WebFormControlElement& element,
    746                                       ExtractMask extract_mask,
    747                                       FormFieldData* field) {
    748   DCHECK(field);
    749   DCHECK(!element.isNull());
    750   CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
    751 
    752   // The label is not officially part of a WebFormControlElement; however, the
    753   // labels for all form control elements are scraped from the DOM and set in
    754   // WebFormElementToFormData.
    755   field->name = element.nameForAutofill();
    756   field->form_control_type = UTF16ToUTF8(element.formControlType());
    757   field->autocomplete_attribute =
    758       UTF16ToUTF8(element.getAttribute(kAutocomplete));
    759   if (field->autocomplete_attribute.size() > kMaxDataLength) {
    760     // Discard overly long attribute values to avoid DOS-ing the browser
    761     // process.  However, send over a default string to indicate that the
    762     // attribute was present.
    763     field->autocomplete_attribute = "x-max-data-length-exceeded";
    764   }
    765 
    766   if (!IsAutofillableElement(element))
    767     return;
    768 
    769   const WebInputElement* input_element = toWebInputElement(&element);
    770   if (IsAutofillableInputElement(input_element)) {
    771     if (IsTextInput(input_element))
    772       field->max_length = input_element->maxLength();
    773 
    774     field->is_autofilled = input_element->isAutofilled();
    775     field->is_focusable = input_element->isFocusable();
    776     field->is_checkable = IsCheckableElement(input_element);
    777     field->is_checked = input_element->isChecked();
    778     field->should_autocomplete = input_element->autoComplete();
    779     field->text_direction = input_element->directionForFormData() == "rtl" ?
    780         base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
    781   } else if (IsTextAreaElement(element)) {
    782     // Nothing more to do in this case.
    783   } else if (extract_mask & EXTRACT_OPTIONS) {
    784     // Set option strings on the field if available.
    785     DCHECK(IsSelectElement(element));
    786     const WebSelectElement select_element = element.toConst<WebSelectElement>();
    787     GetOptionStringsFromElement(select_element,
    788                                 &field->option_values,
    789                                 &field->option_contents);
    790   }
    791 
    792   if (!(extract_mask & EXTRACT_VALUE))
    793     return;
    794 
    795   base::string16 value;
    796   if (IsAutofillableInputElement(input_element)) {
    797     value = input_element->value();
    798   } else if (IsTextAreaElement(element)) {
    799     value = element.toConst<WebTextAreaElement>().value();
    800   } else {
    801     DCHECK(IsSelectElement(element));
    802     const WebSelectElement select_element = element.toConst<WebSelectElement>();
    803     value = select_element.value();
    804 
    805     // Convert the |select_element| value to text if requested.
    806     if (extract_mask & EXTRACT_OPTION_TEXT) {
    807       WebVector<WebElement> list_items = select_element.listItems();
    808       for (size_t i = 0; i < list_items.size(); ++i) {
    809         if (IsOptionElement(list_items[i])) {
    810           const WebOptionElement option_element =
    811               list_items[i].toConst<WebOptionElement>();
    812           if (option_element.value() == value) {
    813             value = option_element.text();
    814             break;
    815           }
    816         }
    817       }
    818     }
    819   }
    820 
    821   // Constrain the maximum data length to prevent a malicious site from DOS'ing
    822   // the browser: http://crbug.com/49332
    823   if (value.size() > kMaxDataLength)
    824     value = value.substr(0, kMaxDataLength);
    825 
    826   field->value = value;
    827 }
    828 
    829 bool WebFormElementToFormData(
    830     const blink::WebFormElement& form_element,
    831     const blink::WebFormControlElement& form_control_element,
    832     RequirementsMask requirements,
    833     ExtractMask extract_mask,
    834     FormData* form,
    835     FormFieldData* field) {
    836   CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
    837   CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
    838   CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
    839 
    840   const WebFrame* frame = form_element.document().frame();
    841   if (!frame)
    842     return false;
    843 
    844   if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete())
    845     return false;
    846 
    847   form->name = GetFormIdentifier(form_element);
    848   form->method = form_element.method();
    849   form->origin = frame->document().url();
    850   form->action = frame->document().completeURL(form_element.action());
    851   form->user_submitted = form_element.wasUserSubmitted();
    852 
    853   // If the completed URL is not valid, just use the action we get from
    854   // WebKit.
    855   if (!form->action.is_valid())
    856     form->action = GURL(form_element.action());
    857 
    858   // A map from a FormFieldData's name to the FormFieldData itself.
    859   std::map<base::string16, FormFieldData*> name_map;
    860 
    861   // The extracted FormFields.  We use pointers so we can store them in
    862   // |name_map|.
    863   ScopedVector<FormFieldData> form_fields;
    864 
    865   WebVector<WebFormControlElement> control_elements;
    866   form_element.getFormControlElements(control_elements);
    867 
    868   // A vector of bools that indicate whether each field in the form meets the
    869   // requirements and thus will be in the resulting |form|.
    870   std::vector<bool> fields_extracted(control_elements.size(), false);
    871 
    872   for (size_t i = 0; i < control_elements.size(); ++i) {
    873     const WebFormControlElement& control_element = control_elements[i];
    874 
    875     if (!IsAutofillableElement(control_element))
    876       continue;
    877 
    878     const WebInputElement* input_element = toWebInputElement(&control_element);
    879     if (requirements & REQUIRE_AUTOCOMPLETE &&
    880         IsAutofillableInputElement(input_element) &&
    881         !SatisfiesRequireAutocomplete(*input_element))
    882       continue;
    883 
    884     // Create a new FormFieldData, fill it out and map it to the field's name.
    885     FormFieldData* form_field = new FormFieldData;
    886     WebFormControlElementToFormField(control_element, extract_mask, form_field);
    887     form_fields.push_back(form_field);
    888     // TODO(jhawkins): A label element is mapped to a form control element's id.
    889     // field->name() will contain the id only if the name does not exist.  Add
    890     // an id() method to WebFormControlElement and use that here.
    891     name_map[form_field->name] = form_field;
    892     fields_extracted[i] = true;
    893   }
    894 
    895   // If we failed to extract any fields, give up.  Also, to avoid overly
    896   // expensive computation, we impose a maximum number of allowable fields.
    897   if (form_fields.empty() || form_fields.size() > kMaxParseableFields)
    898     return false;
    899 
    900   // Loop through the label elements inside the form element.  For each label
    901   // element, get the corresponding form control element, use the form control
    902   // element's name as a key into the <name, FormFieldData> map to find the
    903   // previously created FormFieldData and set the FormFieldData's label to the
    904   // label.firstChild().nodeValue() of the label element.
    905   WebNodeList labels = form_element.getElementsByTagName(kLabel);
    906   for (unsigned i = 0; i < labels.length(); ++i) {
    907     WebLabelElement label = labels.item(i).to<WebLabelElement>();
    908     WebFormControlElement field_element =
    909         label.correspondingControl().to<WebFormControlElement>();
    910 
    911     base::string16 element_name;
    912     if (field_element.isNull()) {
    913       // Sometimes site authors will incorrectly specify the corresponding
    914       // field element's name rather than its id, so we compensate here.
    915       element_name = label.getAttribute(kFor);
    916     } else if (
    917         !field_element.isFormControlElement() ||
    918         field_element.formControlType() == kHidden) {
    919       continue;
    920     } else {
    921       element_name = field_element.nameForAutofill();
    922     }
    923 
    924     std::map<base::string16, FormFieldData*>::iterator iter =
    925         name_map.find(element_name);
    926     if (iter != name_map.end()) {
    927       base::string16 label_text = FindChildText(label);
    928 
    929       // Concatenate labels because some sites might have multiple label
    930       // candidates.
    931       if (!iter->second->label.empty() && !label_text.empty())
    932         iter->second->label += ASCIIToUTF16(" ");
    933       iter->second->label += label_text;
    934     }
    935   }
    936 
    937   // Loop through the form control elements, extracting the label text from
    938   // the DOM.  We use the |fields_extracted| vector to make sure we assign the
    939   // extracted label to the correct field, as it's possible |form_fields| will
    940   // not contain all of the elements in |control_elements|.
    941   for (size_t i = 0, field_idx = 0;
    942        i < control_elements.size() && field_idx < form_fields.size(); ++i) {
    943     // This field didn't meet the requirements, so don't try to find a label
    944     // for it.
    945     if (!fields_extracted[i])
    946       continue;
    947 
    948     const WebFormControlElement& control_element = control_elements[i];
    949     if (form_fields[field_idx]->label.empty())
    950       form_fields[field_idx]->label = InferLabelForElement(control_element);
    951 
    952     if (field && form_control_element == control_element)
    953       *field = *form_fields[field_idx];
    954 
    955     ++field_idx;
    956   }
    957 
    958   // Copy the created FormFields into the resulting FormData object.
    959   for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin();
    960        iter != form_fields.end(); ++iter) {
    961     form->fields.push_back(**iter);
    962   }
    963 
    964   return true;
    965 }
    966 
    967 bool FindFormAndFieldForInputElement(const WebInputElement& element,
    968                                      FormData* form,
    969                                      FormFieldData* field,
    970                                      RequirementsMask requirements) {
    971   if (!IsAutofillableElement(element))
    972     return false;
    973 
    974   const WebFormElement form_element = element.form();
    975   if (form_element.isNull())
    976     return false;
    977 
    978   ExtractMask extract_mask =
    979       static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
    980   return WebFormElementToFormData(form_element,
    981                                   element,
    982                                   requirements,
    983                                   extract_mask,
    984                                   form,
    985                                   field);
    986 }
    987 
    988 void FillForm(const FormData& form, const WebInputElement& element) {
    989   WebFormElement form_element = element.form();
    990   if (form_element.isNull())
    991     return;
    992 
    993   ForEachMatchingFormField(form_element,
    994                            element,
    995                            form,
    996                            FILTER_ALL_NON_EDITIABLE_ELEMENTS,
    997                            false, /* dont force override */
    998                            &FillFormField);
    999 }
   1000 
   1001 void FillFormIncludingNonFocusableElements(const FormData& form_data,
   1002                                            const WebFormElement& form_element) {
   1003   if (form_element.isNull())
   1004     return;
   1005 
   1006   FieldFilterMask filter_mask = static_cast<FieldFilterMask>(
   1007       FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS);
   1008   ForEachMatchingFormField(form_element,
   1009                            WebInputElement(),
   1010                            form_data,
   1011                            filter_mask,
   1012                            true, /* force override */
   1013                            &FillFormField);
   1014 }
   1015 
   1016 void FillFormForAllElements(const FormData& form_data,
   1017                             const WebFormElement& form_element) {
   1018   if (form_element.isNull())
   1019     return;
   1020 
   1021   ForEachMatchingFormField(form_element,
   1022                            WebInputElement(),
   1023                            form_data,
   1024                            FILTER_NONE,
   1025                            true, /* force override */
   1026                            &FillFormField);
   1027 }
   1028 
   1029 void PreviewForm(const FormData& form, const WebInputElement& element) {
   1030   WebFormElement form_element = element.form();
   1031   if (form_element.isNull())
   1032     return;
   1033 
   1034   ForEachMatchingFormField(form_element,
   1035                            element,
   1036                            form,
   1037                            FILTER_ALL_NON_EDITIABLE_ELEMENTS,
   1038                            false, /* dont force override */
   1039                            &PreviewFormField);
   1040 }
   1041 
   1042 bool ClearPreviewedFormWithElement(const WebInputElement& element,
   1043                                    bool was_autofilled) {
   1044   WebFormElement form_element = element.form();
   1045   if (form_element.isNull())
   1046     return false;
   1047 
   1048   std::vector<WebFormControlElement> control_elements;
   1049   ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
   1050                               &control_elements);
   1051   for (size_t i = 0; i < control_elements.size(); ++i) {
   1052     // Only text input elements can be previewed.
   1053     WebInputElement* input_element = toWebInputElement(&control_elements[i]);
   1054     if (!IsTextInput(input_element))
   1055       continue;
   1056 
   1057     // If the input element is not auto-filled, we did not preview it, so there
   1058     // is nothing to reset.
   1059     if (!input_element->isAutofilled())
   1060       continue;
   1061 
   1062     // There might be unrelated elements in this form which have already been
   1063     // auto-filled.  For example, the user might have already filled the address
   1064     // part of a form and now be dealing with the credit card section.  We only
   1065     // want to reset the auto-filled status for fields that were previewed.
   1066     if (input_element->suggestedValue().isEmpty())
   1067       continue;
   1068 
   1069     // Clear the suggested value. For the initiating node, also restore the
   1070     // original value.
   1071     input_element->setSuggestedValue(WebString());
   1072     bool is_initiating_node = (element == *input_element);
   1073     if (is_initiating_node)
   1074       input_element->setAutofilled(was_autofilled);
   1075     else
   1076       input_element->setAutofilled(false);
   1077 
   1078     // Clearing the suggested value in the focused node (above) can cause
   1079     // selection to be lost. We force selection range to restore the text
   1080     // cursor.
   1081     if (is_initiating_node) {
   1082       int length = input_element->value().length();
   1083       input_element->setSelectionRange(length, length);
   1084     }
   1085   }
   1086 
   1087   return true;
   1088 }
   1089 
   1090 bool FormWithElementIsAutofilled(const WebInputElement& element) {
   1091   WebFormElement form_element = element.form();
   1092   if (form_element.isNull())
   1093     return false;
   1094 
   1095   std::vector<WebFormControlElement> control_elements;
   1096   ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
   1097                               &control_elements);
   1098   for (size_t i = 0; i < control_elements.size(); ++i) {
   1099     WebInputElement* input_element = toWebInputElement(&control_elements[i]);
   1100     if (!IsAutofillableInputElement(input_element))
   1101       continue;
   1102 
   1103     if (input_element->isAutofilled())
   1104       return true;
   1105   }
   1106 
   1107   return false;
   1108 }
   1109 
   1110 bool IsWebpageEmpty(const blink::WebFrame* frame) {
   1111   blink::WebDocument document = frame->document();
   1112 
   1113   return IsWebElementEmpty(document.head()) &&
   1114          IsWebElementEmpty(document.body());
   1115 }
   1116 
   1117 bool IsWebElementEmpty(const blink::WebElement& element) {
   1118   // This array contains all tags which can be present in an empty page.
   1119   const char* const kAllowedValue[] = {
   1120     "script",
   1121     "meta",
   1122     "title",
   1123   };
   1124   const size_t kAllowedValueLength = arraysize(kAllowedValue);
   1125 
   1126   if (element.isNull())
   1127     return true;
   1128   // The childNodes method is not a const method. Therefore it cannot be called
   1129   // on a const reference. Therefore we need a const cast.
   1130   const blink::WebNodeList& children =
   1131       const_cast<blink::WebElement&>(element).childNodes();
   1132   for (size_t i = 0; i < children.length(); ++i) {
   1133     const blink::WebNode& item = children.item(i);
   1134 
   1135     if (item.isTextNode() &&
   1136         !ContainsOnlyWhitespaceASCII(item.nodeValue().utf8()))
   1137       return false;
   1138 
   1139     // We ignore all other items with names which begin with
   1140     // the character # because they are not html tags.
   1141     if (item.nodeName().utf8()[0] == '#')
   1142       continue;
   1143 
   1144     bool tag_is_allowed = false;
   1145     // Test if the item name is in the kAllowedValue array
   1146     for (size_t allowed_value_index = 0;
   1147          allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
   1148       if (HasTagName(item,
   1149                      WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
   1150         tag_is_allowed = true;
   1151         break;
   1152       }
   1153     }
   1154     if (!tag_is_allowed)
   1155       return false;
   1156   }
   1157   return true;
   1158 }
   1159 
   1160 }  // namespace autofill
   1161