1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/autofill/content/renderer/form_autofill_util.h" 6 7 #include <map> 8 9 #include "base/command_line.h" 10 #include "base/logging.h" 11 #include "base/memory/scoped_vector.h" 12 #include "base/metrics/field_trial.h" 13 #include "base/strings/string_util.h" 14 #include "base/strings/utf_string_conversions.h" 15 #include "components/autofill/core/common/autofill_data_validation.h" 16 #include "components/autofill/core/common/autofill_switches.h" 17 #include "components/autofill/core/common/form_data.h" 18 #include "components/autofill/core/common/form_field_data.h" 19 #include "components/autofill/core/common/web_element_descriptor.h" 20 #include "third_party/WebKit/public/platform/WebString.h" 21 #include "third_party/WebKit/public/platform/WebVector.h" 22 #include "third_party/WebKit/public/web/WebDocument.h" 23 #include "third_party/WebKit/public/web/WebElement.h" 24 #include "third_party/WebKit/public/web/WebElementCollection.h" 25 #include "third_party/WebKit/public/web/WebExceptionCode.h" 26 #include "third_party/WebKit/public/web/WebFormControlElement.h" 27 #include "third_party/WebKit/public/web/WebFormElement.h" 28 #include "third_party/WebKit/public/web/WebInputElement.h" 29 #include "third_party/WebKit/public/web/WebLabelElement.h" 30 #include "third_party/WebKit/public/web/WebLocalFrame.h" 31 #include "third_party/WebKit/public/web/WebNode.h" 32 #include "third_party/WebKit/public/web/WebNodeList.h" 33 #include "third_party/WebKit/public/web/WebOptionElement.h" 34 #include "third_party/WebKit/public/web/WebSelectElement.h" 35 #include "third_party/WebKit/public/web/WebTextAreaElement.h" 36 37 using blink::WebDocument; 38 using blink::WebElement; 39 using blink::WebElementCollection; 40 using blink::WebExceptionCode; 41 using blink::WebFormControlElement; 42 using blink::WebFormElement; 43 using blink::WebFrame; 44 using blink::WebInputElement; 45 using blink::WebLabelElement; 46 using blink::WebNode; 47 using blink::WebNodeList; 48 using blink::WebOptionElement; 49 using blink::WebSelectElement; 50 using blink::WebTextAreaElement; 51 using blink::WebString; 52 using blink::WebVector; 53 54 namespace autofill { 55 namespace { 56 57 // A bit field mask for FillForm functions to not fill some fields. 58 enum FieldFilterMask { 59 FILTER_NONE = 0, 60 FILTER_DISABLED_ELEMENTS = 1 << 0, 61 FILTER_READONLY_ELEMENTS = 1 << 1, 62 FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2, 63 FILTER_ALL_NON_EDITIABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS | 64 FILTER_READONLY_ELEMENTS | 65 FILTER_NON_FOCUSABLE_ELEMENTS, 66 }; 67 68 bool IsOptionElement(const WebElement& element) { 69 CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option")); 70 return element.hasHTMLTagName(kOption); 71 } 72 73 bool IsScriptElement(const WebElement& element) { 74 CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script")); 75 return element.hasHTMLTagName(kScript); 76 } 77 78 bool IsNoScriptElement(const WebElement& element) { 79 CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript")); 80 return element.hasHTMLTagName(kNoScript); 81 } 82 83 bool HasTagName(const WebNode& node, const blink::WebString& tag) { 84 return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag); 85 } 86 87 bool IsAutofillableElement(const WebFormControlElement& element) { 88 const WebInputElement* input_element = toWebInputElement(&element); 89 return IsAutofillableInputElement(input_element) || 90 IsSelectElement(element) || 91 IsTextAreaElement(element); 92 } 93 94 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement. 95 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) { 96 return input_element.autoComplete(); 97 } 98 99 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed 100 // to a single space. If |force_whitespace| is true, then the resulting string 101 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the 102 // result includes a space only if |prefix| has trailing whitespace or |suffix| 103 // has leading whitespace. 104 // A few examples: 105 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" 106 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" 107 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" 108 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" 109 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" 110 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" 111 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " 112 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " 113 const base::string16 CombineAndCollapseWhitespace( 114 const base::string16& prefix, 115 const base::string16& suffix, 116 bool force_whitespace) { 117 base::string16 prefix_trimmed; 118 base::TrimPositions prefix_trailing_whitespace = 119 base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed); 120 121 // Recursively compute the children's text. 122 base::string16 suffix_trimmed; 123 base::TrimPositions suffix_leading_whitespace = 124 base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed); 125 126 if (prefix_trailing_whitespace || suffix_leading_whitespace || 127 force_whitespace) { 128 return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed; 129 } else { 130 return prefix_trimmed + suffix_trimmed; 131 } 132 } 133 134 // This is a helper function for the FindChildText() function (see below). 135 // Search depth is limited with the |depth| parameter. 136 base::string16 FindChildTextInner(const WebNode& node, int depth) { 137 if (depth <= 0 || node.isNull()) 138 return base::string16(); 139 140 // Skip over comments. 141 if (node.nodeType() == WebNode::CommentNode) 142 return FindChildTextInner(node.nextSibling(), depth - 1); 143 144 if (node.nodeType() != WebNode::ElementNode && 145 node.nodeType() != WebNode::TextNode) 146 return base::string16(); 147 148 // Ignore elements known not to contain inferable labels. 149 if (node.isElementNode()) { 150 const WebElement element = node.toConst<WebElement>(); 151 if (IsOptionElement(element) || 152 IsScriptElement(element) || 153 IsNoScriptElement(element) || 154 (element.isFormControlElement() && 155 IsAutofillableElement(element.toConst<WebFormControlElement>()))) { 156 return base::string16(); 157 } 158 } 159 160 // Extract the text exactly at this node. 161 base::string16 node_text = node.nodeValue(); 162 163 // Recursively compute the children's text. 164 // Preserve inter-element whitespace separation. 165 base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); 166 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); 167 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); 168 169 // Recursively compute the siblings' text. 170 // Again, preserve inter-element whitespace separation. 171 base::string16 sibling_text = 172 FindChildTextInner(node.nextSibling(), depth - 1); 173 add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); 174 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); 175 176 return node_text; 177 } 178 179 // Returns the aggregated values of the descendants of |element| that are 180 // non-empty text nodes. This is a faster alternative to |innerText()| for 181 // performance critical operations. It does a full depth-first search so can be 182 // used when the structure is not directly known. However, unlike with 183 // |innerText()|, the search depth and breadth are limited to a fixed threshold. 184 // Whitespace is trimmed from text accumulated at descendant nodes. 185 base::string16 FindChildText(const WebNode& node) { 186 if (node.isTextNode()) 187 return node.nodeValue(); 188 189 WebNode child = node.firstChild(); 190 191 const int kChildSearchDepth = 10; 192 base::string16 node_text = FindChildTextInner(child, kChildSearchDepth); 193 base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text); 194 return node_text; 195 } 196 197 // Helper for |InferLabelForElement()| that infers a label, if possible, from 198 // a previous sibling of |element|, 199 // e.g. Some Text <input ...> 200 // or Some <span>Text</span> <input ...> 201 // or <p>Some Text</p><input ...> 202 // or <label>Some Text</label> <input ...> 203 // or Some Text <img><input ...> 204 // or <b>Some Text</b><br/> <input ...>. 205 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) { 206 base::string16 inferred_label; 207 WebNode previous = element; 208 while (true) { 209 previous = previous.previousSibling(); 210 if (previous.isNull()) 211 break; 212 213 // Skip over comments. 214 WebNode::NodeType node_type = previous.nodeType(); 215 if (node_type == WebNode::CommentNode) 216 continue; 217 218 // Otherwise, only consider normal HTML elements and their contents. 219 if (node_type != WebNode::TextNode && 220 node_type != WebNode::ElementNode) 221 break; 222 223 // A label might be split across multiple "lightweight" nodes. 224 // Coalesce any text contained in multiple consecutive 225 // (a) plain text nodes or 226 // (b) inline HTML elements that are essentially equivalent to text nodes. 227 CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b")); 228 CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong")); 229 CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span")); 230 CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font")); 231 if (previous.isTextNode() || 232 HasTagName(previous, kBold) || HasTagName(previous, kStrong) || 233 HasTagName(previous, kSpan) || HasTagName(previous, kFont)) { 234 base::string16 value = FindChildText(previous); 235 // A text node's value will be empty if it is for a line break. 236 bool add_space = previous.isTextNode() && value.empty(); 237 inferred_label = 238 CombineAndCollapseWhitespace(value, inferred_label, add_space); 239 continue; 240 } 241 242 // If we have identified a partial label and have reached a non-lightweight 243 // element, consider the label to be complete. 244 base::string16 trimmed_label; 245 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label); 246 if (!trimmed_label.empty()) 247 break; 248 249 // <img> and <br> tags often appear between the input element and its 250 // label text, so skip over them. 251 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img")); 252 CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br")); 253 if (HasTagName(previous, kImage) || HasTagName(previous, kBreak)) 254 continue; 255 256 // We only expect <p> and <label> tags to contain the full label text. 257 CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p")); 258 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); 259 if (HasTagName(previous, kPage) || HasTagName(previous, kLabel)) 260 inferred_label = FindChildText(previous); 261 262 break; 263 } 264 265 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label); 266 return inferred_label; 267 } 268 269 // Helper for |InferLabelForElement()| that infers a label, if possible, from 270 // enclosing list item, 271 // e.g. <li>Some Text<input ...><input ...><input ...></tr> 272 base::string16 InferLabelFromListItem(const WebFormControlElement& element) { 273 WebNode parent = element.parentNode(); 274 CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li")); 275 while (!parent.isNull() && parent.isElementNode() && 276 !parent.to<WebElement>().hasTagName(kListItem)) { 277 parent = parent.parentNode(); 278 } 279 280 if (!parent.isNull() && HasTagName(parent, kListItem)) 281 return FindChildText(parent); 282 283 return base::string16(); 284 } 285 286 // Helper for |InferLabelForElement()| that infers a label, if possible, from 287 // surrounding table structure, 288 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> 289 // or <tr><th>Some Text</th><td><input ...></td></tr> 290 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> 291 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> 292 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) { 293 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td")); 294 WebNode parent = element.parentNode(); 295 while (!parent.isNull() && parent.isElementNode() && 296 !parent.to<WebElement>().hasTagName(kTableCell)) { 297 parent = parent.parentNode(); 298 } 299 300 if (parent.isNull()) 301 return base::string16(); 302 303 // Check all previous siblings, skipping non-element nodes, until we find a 304 // non-empty text block. 305 base::string16 inferred_label; 306 WebNode previous = parent.previousSibling(); 307 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th")); 308 while (inferred_label.empty() && !previous.isNull()) { 309 if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader)) 310 inferred_label = FindChildText(previous); 311 312 previous = previous.previousSibling(); 313 } 314 315 return inferred_label; 316 } 317 318 // Helper for |InferLabelForElement()| that infers a label, if possible, from 319 // surrounding table structure, 320 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> 321 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) { 322 CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr")); 323 WebNode parent = element.parentNode(); 324 while (!parent.isNull() && parent.isElementNode() && 325 !parent.to<WebElement>().hasTagName(kTableRow)) { 326 parent = parent.parentNode(); 327 } 328 329 if (parent.isNull()) 330 return base::string16(); 331 332 // Check all previous siblings, skipping non-element nodes, until we find a 333 // non-empty text block. 334 base::string16 inferred_label; 335 WebNode previous = parent.previousSibling(); 336 while (inferred_label.empty() && !previous.isNull()) { 337 if (HasTagName(previous, kTableRow)) 338 inferred_label = FindChildText(previous); 339 340 previous = previous.previousSibling(); 341 } 342 343 return inferred_label; 344 } 345 346 // Helper for |InferLabelForElement()| that infers a label, if possible, from 347 // a surrounding div table, 348 // e.g. <div>Some Text<span><input ...></span></div> 349 // e.g. <div>Some Text</div><div><input ...></div> 350 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) { 351 WebNode node = element.parentNode(); 352 bool looking_for_parent = true; 353 354 // Search the sibling and parent <div>s until we find a candidate label. 355 base::string16 inferred_label; 356 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div")); 357 CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table")); 358 CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset")); 359 while (inferred_label.empty() && !node.isNull()) { 360 if (HasTagName(node, kDiv)) { 361 looking_for_parent = false; 362 inferred_label = FindChildText(node); 363 } else if (looking_for_parent && 364 (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) { 365 // If the element is in a table or fieldset, its label most likely is too. 366 break; 367 } 368 369 if (node.previousSibling().isNull()) { 370 // If there are no more siblings, continue walking up the tree. 371 looking_for_parent = true; 372 } 373 374 if (looking_for_parent) 375 node = node.parentNode(); 376 else 377 node = node.previousSibling(); 378 } 379 380 return inferred_label; 381 } 382 383 // Helper for |InferLabelForElement()| that infers a label, if possible, from 384 // a surrounding definition list, 385 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> 386 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> 387 base::string16 InferLabelFromDefinitionList( 388 const WebFormControlElement& element) { 389 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd")); 390 WebNode parent = element.parentNode(); 391 while (!parent.isNull() && parent.isElementNode() && 392 !parent.to<WebElement>().hasTagName(kDefinitionData)) 393 parent = parent.parentNode(); 394 395 if (parent.isNull() || !HasTagName(parent, kDefinitionData)) 396 return base::string16(); 397 398 // Skip by any intervening text nodes. 399 WebNode previous = parent.previousSibling(); 400 while (!previous.isNull() && previous.isTextNode()) 401 previous = previous.previousSibling(); 402 403 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt")); 404 if (previous.isNull() || !HasTagName(previous, kDefinitionTag)) 405 return base::string16(); 406 407 return FindChildText(previous); 408 } 409 410 // Infers corresponding label for |element| from surrounding context in the DOM, 411 // e.g. the contents of the preceding <p> tag or text element. 412 base::string16 InferLabelForElement(const WebFormControlElement& element) { 413 base::string16 inferred_label = InferLabelFromPrevious(element); 414 if (!inferred_label.empty()) 415 return inferred_label; 416 417 // If we didn't find a label, check for list item case. 418 inferred_label = InferLabelFromListItem(element); 419 if (!inferred_label.empty()) 420 return inferred_label; 421 422 // If we didn't find a label, check for table cell case. 423 inferred_label = InferLabelFromTableColumn(element); 424 if (!inferred_label.empty()) 425 return inferred_label; 426 427 // If we didn't find a label, check for table row case. 428 inferred_label = InferLabelFromTableRow(element); 429 if (!inferred_label.empty()) 430 return inferred_label; 431 432 // If we didn't find a label, check for definition list case. 433 inferred_label = InferLabelFromDefinitionList(element); 434 if (!inferred_label.empty()) 435 return inferred_label; 436 437 // If we didn't find a label, check for div table case. 438 return InferLabelFromDivTable(element); 439 } 440 441 // Fills |option_strings| with the values of the <option> elements present in 442 // |select_element|. 443 void GetOptionStringsFromElement(const WebSelectElement& select_element, 444 std::vector<base::string16>* option_values, 445 std::vector<base::string16>* option_contents) { 446 DCHECK(!select_element.isNull()); 447 448 option_values->clear(); 449 option_contents->clear(); 450 WebVector<WebElement> list_items = select_element.listItems(); 451 452 // Constrain the maximum list length to prevent a malicious site from DOS'ing 453 // the browser, without entirely breaking autocomplete for some extreme 454 // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094 455 if (list_items.size() > kMaxListSize) 456 return; 457 458 option_values->reserve(list_items.size()); 459 option_contents->reserve(list_items.size()); 460 for (size_t i = 0; i < list_items.size(); ++i) { 461 if (IsOptionElement(list_items[i])) { 462 const WebOptionElement option = list_items[i].toConst<WebOptionElement>(); 463 option_values->push_back(option.value()); 464 option_contents->push_back(option.text()); 465 } 466 } 467 } 468 469 // The callback type used by |ForEachMatchingFormField()|. 470 typedef void (*Callback)(const FormFieldData&, 471 bool, /* is_initiating_element */ 472 blink::WebFormControlElement*); 473 474 // For each autofillable field in |data| that matches a field in the |form|, 475 // the |callback| is invoked with the corresponding |form| field data. 476 void ForEachMatchingFormField(const WebFormElement& form_element, 477 const WebElement& initiating_element, 478 const FormData& data, 479 FieldFilterMask filters, 480 bool force_override, 481 Callback callback) { 482 std::vector<WebFormControlElement> control_elements; 483 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 484 &control_elements); 485 486 if (control_elements.size() != data.fields.size()) { 487 // This case should be reachable only for pathological websites and tests, 488 // which add or remove form fields while the user is interacting with the 489 // Autofill popup. 490 return; 491 } 492 493 // It's possible that the site has injected fields into the form after the 494 // page has loaded, so we can't assert that the size of the cached control 495 // elements is equal to the size of the fields in |form|. Fortunately, the 496 // one case in the wild where this happens, paypal.com signup form, the fields 497 // are appended to the end of the form and are not visible. 498 for (size_t i = 0; i < control_elements.size(); ++i) { 499 WebFormControlElement* element = &control_elements[i]; 500 501 if (base::string16(element->nameForAutofill()) != data.fields[i].name) { 502 // This case should be reachable only for pathological websites, which 503 // rename form fields while the user is interacting with the Autofill 504 // popup. I (isherman) am not aware of any such websites, and so am 505 // optimistically including a NOTREACHED(). If you ever trip this check, 506 // please file a bug against me. 507 NOTREACHED(); 508 continue; 509 } 510 511 bool is_initiating_element = (*element == initiating_element); 512 513 // Only autofill empty fields and the field that initiated the filling, 514 // i.e. the field the user is currently editing and interacting with. 515 const WebInputElement* input_element = toWebInputElement(element); 516 if (!force_override && !is_initiating_element && 517 ((IsAutofillableInputElement(input_element) || 518 IsTextAreaElement(*element)) && 519 !element->value().isEmpty())) 520 continue; 521 522 if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) || 523 ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) || 524 ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable())) 525 continue; 526 527 callback(data.fields[i], is_initiating_element, element); 528 } 529 } 530 531 // Sets the |field|'s value to the value in |data|. 532 // Also sets the "autofilled" attribute, causing the background to be yellow. 533 void FillFormField(const FormFieldData& data, 534 bool is_initiating_node, 535 blink::WebFormControlElement* field) { 536 // Nothing to fill. 537 if (data.value.empty()) 538 return; 539 540 if (!data.is_autofilled) 541 return; 542 543 field->setAutofilled(true); 544 545 WebInputElement* input_element = toWebInputElement(field); 546 if (IsCheckableElement(input_element)) { 547 input_element->setChecked(data.is_checked, true); 548 } else { 549 base::string16 value = data.value; 550 if (IsTextInput(input_element) || IsMonthInput(input_element)) { 551 // If the maxlength attribute contains a negative value, maxLength() 552 // returns the default maxlength value. 553 value = value.substr(0, input_element->maxLength()); 554 } 555 field->setValue(value, true); 556 } 557 558 if (is_initiating_node && 559 ((IsTextInput(input_element) || IsMonthInput(input_element)) || 560 IsTextAreaElement(*field))) { 561 int length = field->value().length(); 562 field->setSelectionRange(length, length); 563 // Clear the current IME composition (the underline), if there is one. 564 field->document().frame()->unmarkText(); 565 } 566 } 567 568 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|. 569 // Also sets the "autofilled" attribute, causing the background to be yellow. 570 void PreviewFormField(const FormFieldData& data, 571 bool is_initiating_node, 572 blink::WebFormControlElement* field) { 573 // Nothing to preview. 574 if (data.value.empty()) 575 return; 576 577 if (!data.is_autofilled) 578 return; 579 580 // Preview input, textarea and select fields. For input fields, excludes 581 // checkboxes and radio buttons, as there is no provision for 582 // setSuggestedCheckedValue in WebInputElement. 583 WebInputElement* input_element = toWebInputElement(field); 584 if (IsTextInput(input_element) || IsMonthInput(input_element)) { 585 // If the maxlength attribute contains a negative value, maxLength() 586 // returns the default maxlength value. 587 input_element->setSuggestedValue( 588 data.value.substr(0, input_element->maxLength())); 589 input_element->setAutofilled(true); 590 } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) { 591 field->setSuggestedValue(data.value); 592 field->setAutofilled(true); 593 } 594 595 if (is_initiating_node && 596 (IsTextInput(input_element) || IsTextAreaElement(*field))) { 597 // Select the part of the text that the user didn't type. 598 int start = field->value().length(); 599 int end = field->suggestedValue().length(); 600 field->setSelectionRange(start, end); 601 } 602 } 603 604 std::string RetrievalMethodToString( 605 const WebElementDescriptor::RetrievalMethod& method) { 606 switch (method) { 607 case WebElementDescriptor::CSS_SELECTOR: 608 return "CSS_SELECTOR"; 609 case WebElementDescriptor::ID: 610 return "ID"; 611 case WebElementDescriptor::NONE: 612 return "NONE"; 613 } 614 NOTREACHED(); 615 return "UNKNOWN"; 616 } 617 618 // Recursively checks whether |node| or any of its children have a non-empty 619 // bounding box. The recursion depth is bounded by |depth|. 620 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) { 621 if (depth < 0) 622 return false; 623 if (node.hasNonEmptyBoundingBox()) 624 return true; 625 626 // The childNodes method is not a const method. Therefore it cannot be called 627 // on a const reference. Therefore we need a const cast. 628 const blink::WebNodeList& children = 629 const_cast<blink::WebNode&>(node).childNodes(); 630 size_t length = children.length(); 631 for (size_t i = 0; i < length; ++i) { 632 const blink::WebNode& item = children.item(i); 633 if (IsWebNodeVisibleImpl(item, depth - 1)) 634 return true; 635 } 636 return false; 637 } 638 639 } // namespace 640 641 const size_t kMaxParseableFields = 200; 642 643 bool IsMonthInput(const WebInputElement* element) { 644 CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month")); 645 return element && element->formControlType() == kMonth; 646 } 647 648 // All text fields, including password fields, should be extracted. 649 bool IsTextInput(const WebInputElement* element) { 650 return element && element->isTextField(); 651 } 652 653 bool IsSelectElement(const WebFormControlElement& element) { 654 // Static for improved performance. 655 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); 656 return element.formControlType() == kSelectOne; 657 } 658 659 bool IsTextAreaElement(const WebFormControlElement& element) { 660 // Static for improved performance. 661 CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea")); 662 return element.formControlType() == kTextArea; 663 } 664 665 bool IsCheckableElement(const WebInputElement* element) { 666 if (!element) 667 return false; 668 669 return element->isCheckbox() || element->isRadioButton(); 670 } 671 672 bool IsAutofillableInputElement(const WebInputElement* element) { 673 return IsTextInput(element) || 674 IsMonthInput(element) || 675 IsCheckableElement(element); 676 } 677 678 const base::string16 GetFormIdentifier(const WebFormElement& form) { 679 base::string16 identifier = form.name(); 680 CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id")); 681 if (identifier.empty()) 682 identifier = form.getAttribute(kId); 683 684 return identifier; 685 } 686 687 bool IsWebNodeVisible(const blink::WebNode& node) { 688 // In the bug http://crbug.com/237216 the form's bounding box is empty 689 // however the form has non empty children. Thus we need to look at the 690 // form's children. 691 int kNodeSearchDepth = 2; 692 return IsWebNodeVisibleImpl(node, kNodeSearchDepth); 693 } 694 695 bool ClickElement(const WebDocument& document, 696 const WebElementDescriptor& element_descriptor) { 697 WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor); 698 blink::WebElement element; 699 700 switch (element_descriptor.retrieval_method) { 701 case WebElementDescriptor::CSS_SELECTOR: { 702 WebExceptionCode ec = 0; 703 element = document.querySelector(web_descriptor, ec); 704 if (ec) 705 DVLOG(1) << "Query selector failed. Error code: " << ec << "."; 706 break; 707 } 708 case WebElementDescriptor::ID: 709 element = document.getElementById(web_descriptor); 710 break; 711 case WebElementDescriptor::NONE: 712 return true; 713 } 714 715 if (element.isNull()) { 716 DVLOG(1) << "Could not find " 717 << element_descriptor.descriptor 718 << " by " 719 << RetrievalMethodToString(element_descriptor.retrieval_method) 720 << "."; 721 return false; 722 } 723 724 element.simulateClick(); 725 return true; 726 } 727 728 // Fills |autofillable_elements| with all the auto-fillable form control 729 // elements in |form_element|. 730 void ExtractAutofillableElements( 731 const WebFormElement& form_element, 732 RequirementsMask requirements, 733 std::vector<WebFormControlElement>* autofillable_elements) { 734 WebVector<WebFormControlElement> control_elements; 735 form_element.getFormControlElements(control_elements); 736 737 autofillable_elements->clear(); 738 for (size_t i = 0; i < control_elements.size(); ++i) { 739 WebFormControlElement element = control_elements[i]; 740 if (!IsAutofillableElement(element)) 741 continue; 742 743 if (requirements & REQUIRE_AUTOCOMPLETE) { 744 // TODO(isherman): WebKit currently doesn't handle the autocomplete 745 // attribute for select or textarea elements, but it probably should. 746 WebInputElement* input_element = toWebInputElement(&control_elements[i]); 747 if (IsAutofillableInputElement(input_element) && 748 !SatisfiesRequireAutocomplete(*input_element)) 749 continue; 750 } 751 752 autofillable_elements->push_back(element); 753 } 754 } 755 756 void WebFormControlElementToFormField(const WebFormControlElement& element, 757 ExtractMask extract_mask, 758 FormFieldData* field) { 759 DCHECK(field); 760 DCHECK(!element.isNull()); 761 CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete")); 762 763 // The label is not officially part of a WebFormControlElement; however, the 764 // labels for all form control elements are scraped from the DOM and set in 765 // WebFormElementToFormData. 766 field->name = element.nameForAutofill(); 767 field->form_control_type = base::UTF16ToUTF8(element.formControlType()); 768 field->autocomplete_attribute = 769 base::UTF16ToUTF8(element.getAttribute(kAutocomplete)); 770 if (field->autocomplete_attribute.size() > kMaxDataLength) { 771 // Discard overly long attribute values to avoid DOS-ing the browser 772 // process. However, send over a default string to indicate that the 773 // attribute was present. 774 field->autocomplete_attribute = "x-max-data-length-exceeded"; 775 } 776 777 if (!IsAutofillableElement(element)) 778 return; 779 780 const WebInputElement* input_element = toWebInputElement(&element); 781 if (IsAutofillableInputElement(input_element) || 782 IsTextAreaElement(element)) { 783 field->is_autofilled = element.isAutofilled(); 784 field->is_focusable = element.isFocusable(); 785 field->should_autocomplete = element.autoComplete(); 786 field->text_direction = element.directionForFormData() == 787 "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT; 788 } 789 790 if (IsAutofillableInputElement(input_element)) { 791 if (IsTextInput(input_element)) 792 field->max_length = input_element->maxLength(); 793 794 field->is_checkable = IsCheckableElement(input_element); 795 field->is_checked = input_element->isChecked(); 796 } else if (IsTextAreaElement(element)) { 797 // Nothing more to do in this case. 798 } else if (extract_mask & EXTRACT_OPTIONS) { 799 // Set option strings on the field if available. 800 DCHECK(IsSelectElement(element)); 801 const WebSelectElement select_element = element.toConst<WebSelectElement>(); 802 GetOptionStringsFromElement(select_element, 803 &field->option_values, 804 &field->option_contents); 805 } 806 807 if (!(extract_mask & EXTRACT_VALUE)) 808 return; 809 810 base::string16 value = element.value(); 811 812 if (IsSelectElement(element)) { 813 const WebSelectElement select_element = element.toConst<WebSelectElement>(); 814 // Convert the |select_element| value to text if requested. 815 if (extract_mask & EXTRACT_OPTION_TEXT) { 816 WebVector<WebElement> list_items = select_element.listItems(); 817 for (size_t i = 0; i < list_items.size(); ++i) { 818 if (IsOptionElement(list_items[i])) { 819 const WebOptionElement option_element = 820 list_items[i].toConst<WebOptionElement>(); 821 if (option_element.value() == value) { 822 value = option_element.text(); 823 break; 824 } 825 } 826 } 827 } 828 } 829 830 // Constrain the maximum data length to prevent a malicious site from DOS'ing 831 // the browser: http://crbug.com/49332 832 if (value.size() > kMaxDataLength) 833 value = value.substr(0, kMaxDataLength); 834 835 field->value = value; 836 } 837 838 bool WebFormElementToFormData( 839 const blink::WebFormElement& form_element, 840 const blink::WebFormControlElement& form_control_element, 841 RequirementsMask requirements, 842 ExtractMask extract_mask, 843 FormData* form, 844 FormFieldData* field) { 845 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); 846 CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for")); 847 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); 848 849 const WebFrame* frame = form_element.document().frame(); 850 if (!frame) 851 return false; 852 853 if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete()) 854 return false; 855 856 form->name = GetFormIdentifier(form_element); 857 form->method = form_element.method(); 858 form->origin = frame->document().url(); 859 form->action = frame->document().completeURL(form_element.action()); 860 form->user_submitted = form_element.wasUserSubmitted(); 861 862 // If the completed URL is not valid, just use the action we get from 863 // WebKit. 864 if (!form->action.is_valid()) 865 form->action = GURL(form_element.action()); 866 867 // A map from a FormFieldData's name to the FormFieldData itself. 868 std::map<base::string16, FormFieldData*> name_map; 869 870 // The extracted FormFields. We use pointers so we can store them in 871 // |name_map|. 872 ScopedVector<FormFieldData> form_fields; 873 874 WebVector<WebFormControlElement> control_elements; 875 form_element.getFormControlElements(control_elements); 876 877 // A vector of bools that indicate whether each field in the form meets the 878 // requirements and thus will be in the resulting |form|. 879 std::vector<bool> fields_extracted(control_elements.size(), false); 880 881 for (size_t i = 0; i < control_elements.size(); ++i) { 882 const WebFormControlElement& control_element = control_elements[i]; 883 884 if (!IsAutofillableElement(control_element)) 885 continue; 886 887 const WebInputElement* input_element = toWebInputElement(&control_element); 888 if (requirements & REQUIRE_AUTOCOMPLETE && 889 IsAutofillableInputElement(input_element) && 890 !SatisfiesRequireAutocomplete(*input_element)) 891 continue; 892 893 // Create a new FormFieldData, fill it out and map it to the field's name. 894 FormFieldData* form_field = new FormFieldData; 895 WebFormControlElementToFormField(control_element, extract_mask, form_field); 896 form_fields.push_back(form_field); 897 // TODO(jhawkins): A label element is mapped to a form control element's id. 898 // field->name() will contain the id only if the name does not exist. Add 899 // an id() method to WebFormControlElement and use that here. 900 name_map[form_field->name] = form_field; 901 fields_extracted[i] = true; 902 } 903 904 // If we failed to extract any fields, give up. Also, to avoid overly 905 // expensive computation, we impose a maximum number of allowable fields. 906 if (form_fields.empty() || form_fields.size() > kMaxParseableFields) 907 return false; 908 909 // Loop through the label elements inside the form element. For each label 910 // element, get the corresponding form control element, use the form control 911 // element's name as a key into the <name, FormFieldData> map to find the 912 // previously created FormFieldData and set the FormFieldData's label to the 913 // label.firstChild().nodeValue() of the label element. 914 WebElementCollection labels = form_element.getElementsByTagName(kLabel); 915 DCHECK(!labels.isNull()); 916 for (WebElement item = labels.firstItem(); !item.isNull(); 917 item = labels.nextItem()) { 918 WebLabelElement label = item.to<WebLabelElement>(); 919 WebFormControlElement field_element = 920 label.correspondingControl().to<WebFormControlElement>(); 921 922 base::string16 element_name; 923 if (field_element.isNull()) { 924 // Sometimes site authors will incorrectly specify the corresponding 925 // field element's name rather than its id, so we compensate here. 926 element_name = label.getAttribute(kFor); 927 } else if ( 928 !field_element.isFormControlElement() || 929 field_element.formControlType() == kHidden) { 930 continue; 931 } else { 932 element_name = field_element.nameForAutofill(); 933 } 934 935 std::map<base::string16, FormFieldData*>::iterator iter = 936 name_map.find(element_name); 937 if (iter != name_map.end()) { 938 base::string16 label_text = FindChildText(label); 939 940 // Concatenate labels because some sites might have multiple label 941 // candidates. 942 if (!iter->second->label.empty() && !label_text.empty()) 943 iter->second->label += base::ASCIIToUTF16(" "); 944 iter->second->label += label_text; 945 } 946 } 947 948 // Loop through the form control elements, extracting the label text from 949 // the DOM. We use the |fields_extracted| vector to make sure we assign the 950 // extracted label to the correct field, as it's possible |form_fields| will 951 // not contain all of the elements in |control_elements|. 952 for (size_t i = 0, field_idx = 0; 953 i < control_elements.size() && field_idx < form_fields.size(); ++i) { 954 // This field didn't meet the requirements, so don't try to find a label 955 // for it. 956 if (!fields_extracted[i]) 957 continue; 958 959 const WebFormControlElement& control_element = control_elements[i]; 960 if (form_fields[field_idx]->label.empty()) 961 form_fields[field_idx]->label = InferLabelForElement(control_element); 962 963 if (field && form_control_element == control_element) 964 *field = *form_fields[field_idx]; 965 966 ++field_idx; 967 } 968 969 // Copy the created FormFields into the resulting FormData object. 970 for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin(); 971 iter != form_fields.end(); ++iter) { 972 form->fields.push_back(**iter); 973 } 974 975 return true; 976 } 977 978 bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element, 979 FormData* form, 980 FormFieldData* field, 981 RequirementsMask requirements) { 982 if (!IsAutofillableElement(element)) 983 return false; 984 985 const WebFormElement form_element = element.form(); 986 if (form_element.isNull()) 987 return false; 988 989 ExtractMask extract_mask = 990 static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS); 991 return WebFormElementToFormData(form_element, 992 element, 993 requirements, 994 extract_mask, 995 form, 996 field); 997 } 998 999 void FillForm(const FormData& form, const WebFormControlElement& element) { 1000 WebFormElement form_element = element.form(); 1001 if (form_element.isNull()) 1002 return; 1003 1004 ForEachMatchingFormField(form_element, 1005 element, 1006 form, 1007 FILTER_ALL_NON_EDITIABLE_ELEMENTS, 1008 false, /* dont force override */ 1009 &FillFormField); 1010 } 1011 1012 void FillFormIncludingNonFocusableElements(const FormData& form_data, 1013 const WebFormElement& form_element) { 1014 if (form_element.isNull()) 1015 return; 1016 1017 FieldFilterMask filter_mask = static_cast<FieldFilterMask>( 1018 FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS); 1019 ForEachMatchingFormField(form_element, 1020 WebInputElement(), 1021 form_data, 1022 filter_mask, 1023 true, /* force override */ 1024 &FillFormField); 1025 } 1026 1027 void FillFormForAllElements(const FormData& form_data, 1028 const WebFormElement& form_element) { 1029 if (form_element.isNull()) 1030 return; 1031 1032 ForEachMatchingFormField(form_element, 1033 WebInputElement(), 1034 form_data, 1035 FILTER_NONE, 1036 true, /* force override */ 1037 &FillFormField); 1038 } 1039 1040 void PreviewForm(const FormData& form, const WebFormControlElement& element) { 1041 WebFormElement form_element = element.form(); 1042 if (form_element.isNull()) 1043 return; 1044 1045 ForEachMatchingFormField(form_element, 1046 element, 1047 form, 1048 FILTER_ALL_NON_EDITIABLE_ELEMENTS, 1049 false, /* dont force override */ 1050 &PreviewFormField); 1051 } 1052 1053 bool ClearPreviewedFormWithElement(const WebFormControlElement& element, 1054 bool was_autofilled) { 1055 WebFormElement form_element = element.form(); 1056 if (form_element.isNull()) 1057 return false; 1058 1059 std::vector<WebFormControlElement> control_elements; 1060 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 1061 &control_elements); 1062 for (size_t i = 0; i < control_elements.size(); ++i) { 1063 // There might be unrelated elements in this form which have already been 1064 // auto-filled. For example, the user might have already filled the address 1065 // part of a form and now be dealing with the credit card section. We only 1066 // want to reset the auto-filled status for fields that were previewed. 1067 WebFormControlElement control_element = control_elements[i]; 1068 1069 // Only text input, textarea and select elements can be previewed. 1070 WebInputElement* input_element = toWebInputElement(&control_element); 1071 if (!IsTextInput(input_element) && 1072 !IsMonthInput(input_element) && 1073 !IsTextAreaElement(control_element) && 1074 !IsSelectElement(control_element)) 1075 continue; 1076 1077 // If the element is not auto-filled, we did not preview it, 1078 // so there is nothing to reset. 1079 if(!control_element.isAutofilled()) 1080 continue; 1081 1082 if ((IsTextInput(input_element) || 1083 IsMonthInput(input_element) || 1084 IsTextAreaElement(control_element) || 1085 IsSelectElement(control_element)) && 1086 control_element.suggestedValue().isEmpty()) 1087 continue; 1088 1089 // Clear the suggested value. For the initiating node, also restore the 1090 // original value. 1091 if (IsTextInput(input_element) || IsMonthInput(input_element) || 1092 IsTextAreaElement(control_element)) { 1093 control_element.setSuggestedValue(WebString()); 1094 bool is_initiating_node = (element == control_element); 1095 if (is_initiating_node) { 1096 control_element.setAutofilled(was_autofilled); 1097 // Clearing the suggested value in the focused node (above) can cause 1098 // selection to be lost. We force selection range to restore the text 1099 // cursor. 1100 int length = control_element.value().length(); 1101 control_element.setSelectionRange(length, length); 1102 } else { 1103 control_element.setAutofilled(false); 1104 } 1105 } else if (IsSelectElement(control_element)) { 1106 control_element.setSuggestedValue(WebString()); 1107 control_element.setAutofilled(false); 1108 } 1109 } 1110 1111 return true; 1112 } 1113 1114 bool FormWithElementIsAutofilled(const WebInputElement& element) { 1115 WebFormElement form_element = element.form(); 1116 if (form_element.isNull()) 1117 return false; 1118 1119 std::vector<WebFormControlElement> control_elements; 1120 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 1121 &control_elements); 1122 for (size_t i = 0; i < control_elements.size(); ++i) { 1123 WebInputElement* input_element = toWebInputElement(&control_elements[i]); 1124 if (!IsAutofillableInputElement(input_element)) 1125 continue; 1126 1127 if (input_element->isAutofilled()) 1128 return true; 1129 } 1130 1131 return false; 1132 } 1133 1134 bool IsWebpageEmpty(const blink::WebFrame* frame) { 1135 blink::WebDocument document = frame->document(); 1136 1137 return IsWebElementEmpty(document.head()) && 1138 IsWebElementEmpty(document.body()); 1139 } 1140 1141 bool IsWebElementEmpty(const blink::WebElement& element) { 1142 // This array contains all tags which can be present in an empty page. 1143 const char* const kAllowedValue[] = { 1144 "script", 1145 "meta", 1146 "title", 1147 }; 1148 const size_t kAllowedValueLength = arraysize(kAllowedValue); 1149 1150 if (element.isNull()) 1151 return true; 1152 // The childNodes method is not a const method. Therefore it cannot be called 1153 // on a const reference. Therefore we need a const cast. 1154 const blink::WebNodeList& children = 1155 const_cast<blink::WebElement&>(element).childNodes(); 1156 for (size_t i = 0; i < children.length(); ++i) { 1157 const blink::WebNode& item = children.item(i); 1158 1159 if (item.isTextNode() && 1160 !base::ContainsOnlyChars(item.nodeValue().utf8(), 1161 base::kWhitespaceASCII)) 1162 return false; 1163 1164 // We ignore all other items with names which begin with 1165 // the character # because they are not html tags. 1166 if (item.nodeName().utf8()[0] == '#') 1167 continue; 1168 1169 bool tag_is_allowed = false; 1170 // Test if the item name is in the kAllowedValue array 1171 for (size_t allowed_value_index = 0; 1172 allowed_value_index < kAllowedValueLength; ++allowed_value_index) { 1173 if (HasTagName(item, 1174 WebString::fromUTF8(kAllowedValue[allowed_value_index]))) { 1175 tag_is_allowed = true; 1176 break; 1177 } 1178 } 1179 if (!tag_is_allowed) 1180 return false; 1181 } 1182 return true; 1183 } 1184 1185 gfx::RectF GetScaledBoundingBox(float scale, WebFormControlElement* element) { 1186 gfx::Rect bounding_box(element->boundsInViewportSpace()); 1187 return gfx::RectF(bounding_box.x() * scale, 1188 bounding_box.y() * scale, 1189 bounding_box.width() * scale, 1190 bounding_box.height() * scale); 1191 } 1192 1193 } // namespace autofill 1194