1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/autofill/content/renderer/form_autofill_util.h" 6 7 #include <map> 8 9 #include "base/command_line.h" 10 #include "base/logging.h" 11 #include "base/memory/scoped_vector.h" 12 #include "base/metrics/field_trial.h" 13 #include "base/strings/string_util.h" 14 #include "base/strings/utf_string_conversions.h" 15 #include "components/autofill/core/common/autofill_data_validation.h" 16 #include "components/autofill/core/common/autofill_switches.h" 17 #include "components/autofill/core/common/form_data.h" 18 #include "components/autofill/core/common/form_field_data.h" 19 #include "components/autofill/core/common/web_element_descriptor.h" 20 #include "third_party/WebKit/public/platform/WebString.h" 21 #include "third_party/WebKit/public/platform/WebVector.h" 22 #include "third_party/WebKit/public/web/WebDocument.h" 23 #include "third_party/WebKit/public/web/WebElement.h" 24 #include "third_party/WebKit/public/web/WebElementCollection.h" 25 #include "third_party/WebKit/public/web/WebExceptionCode.h" 26 #include "third_party/WebKit/public/web/WebFormControlElement.h" 27 #include "third_party/WebKit/public/web/WebFormElement.h" 28 #include "third_party/WebKit/public/web/WebInputElement.h" 29 #include "third_party/WebKit/public/web/WebLabelElement.h" 30 #include "third_party/WebKit/public/web/WebLocalFrame.h" 31 #include "third_party/WebKit/public/web/WebNode.h" 32 #include "third_party/WebKit/public/web/WebNodeList.h" 33 #include "third_party/WebKit/public/web/WebOptionElement.h" 34 #include "third_party/WebKit/public/web/WebSelectElement.h" 35 #include "third_party/WebKit/public/web/WebTextAreaElement.h" 36 37 using blink::WebDocument; 38 using blink::WebElement; 39 using blink::WebElementCollection; 40 using blink::WebExceptionCode; 41 using blink::WebFormControlElement; 42 using blink::WebFormElement; 43 using blink::WebFrame; 44 using blink::WebInputElement; 45 using blink::WebLabelElement; 46 using blink::WebNode; 47 using blink::WebNodeList; 48 using blink::WebOptionElement; 49 using blink::WebSelectElement; 50 using blink::WebTextAreaElement; 51 using blink::WebString; 52 using blink::WebVector; 53 54 namespace autofill { 55 namespace { 56 57 bool IsOptionElement(const WebElement& element) { 58 CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option")); 59 return element.hasHTMLTagName(kOption); 60 } 61 62 bool IsScriptElement(const WebElement& element) { 63 CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script")); 64 return element.hasHTMLTagName(kScript); 65 } 66 67 bool IsNoScriptElement(const WebElement& element) { 68 CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript")); 69 return element.hasHTMLTagName(kNoScript); 70 } 71 72 bool HasTagName(const WebNode& node, const blink::WebString& tag) { 73 return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag); 74 } 75 76 bool IsAutofillableElement(const WebFormControlElement& element) { 77 const WebInputElement* input_element = toWebInputElement(&element); 78 return IsAutofillableInputElement(input_element) || 79 IsSelectElement(element) || 80 IsTextAreaElement(element); 81 } 82 83 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement. 84 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) { 85 return input_element.autoComplete(); 86 } 87 88 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed 89 // to a single space. If |force_whitespace| is true, then the resulting string 90 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the 91 // result includes a space only if |prefix| has trailing whitespace or |suffix| 92 // has leading whitespace. 93 // A few examples: 94 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" 95 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" 96 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" 97 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" 98 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" 99 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" 100 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " 101 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " 102 const base::string16 CombineAndCollapseWhitespace( 103 const base::string16& prefix, 104 const base::string16& suffix, 105 bool force_whitespace) { 106 base::string16 prefix_trimmed; 107 base::TrimPositions prefix_trailing_whitespace = 108 base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed); 109 110 // Recursively compute the children's text. 111 base::string16 suffix_trimmed; 112 base::TrimPositions suffix_leading_whitespace = 113 base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed); 114 115 if (prefix_trailing_whitespace || suffix_leading_whitespace || 116 force_whitespace) { 117 return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed; 118 } else { 119 return prefix_trimmed + suffix_trimmed; 120 } 121 } 122 123 // This is a helper function for the FindChildText() function (see below). 124 // Search depth is limited with the |depth| parameter. 125 base::string16 FindChildTextInner(const WebNode& node, int depth) { 126 if (depth <= 0 || node.isNull()) 127 return base::string16(); 128 129 // Skip over comments. 130 if (node.nodeType() == WebNode::CommentNode) 131 return FindChildTextInner(node.nextSibling(), depth - 1); 132 133 if (node.nodeType() != WebNode::ElementNode && 134 node.nodeType() != WebNode::TextNode) 135 return base::string16(); 136 137 // Ignore elements known not to contain inferable labels. 138 if (node.isElementNode()) { 139 const WebElement element = node.toConst<WebElement>(); 140 if (IsOptionElement(element) || 141 IsScriptElement(element) || 142 IsNoScriptElement(element) || 143 (element.isFormControlElement() && 144 IsAutofillableElement(element.toConst<WebFormControlElement>()))) { 145 return base::string16(); 146 } 147 } 148 149 // Extract the text exactly at this node. 150 base::string16 node_text = node.nodeValue(); 151 152 // Recursively compute the children's text. 153 // Preserve inter-element whitespace separation. 154 base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); 155 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); 156 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); 157 158 // Recursively compute the siblings' text. 159 // Again, preserve inter-element whitespace separation. 160 base::string16 sibling_text = 161 FindChildTextInner(node.nextSibling(), depth - 1); 162 add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); 163 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); 164 165 return node_text; 166 } 167 168 // Returns the aggregated values of the descendants of |element| that are 169 // non-empty text nodes. This is a faster alternative to |innerText()| for 170 // performance critical operations. It does a full depth-first search so can be 171 // used when the structure is not directly known. However, unlike with 172 // |innerText()|, the search depth and breadth are limited to a fixed threshold. 173 // Whitespace is trimmed from text accumulated at descendant nodes. 174 base::string16 FindChildText(const WebNode& node) { 175 if (node.isTextNode()) 176 return node.nodeValue(); 177 178 WebNode child = node.firstChild(); 179 180 const int kChildSearchDepth = 10; 181 base::string16 node_text = FindChildTextInner(child, kChildSearchDepth); 182 base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text); 183 return node_text; 184 } 185 186 // Helper for |InferLabelForElement()| that infers a label, if possible, from 187 // a previous sibling of |element|, 188 // e.g. Some Text <input ...> 189 // or Some <span>Text</span> <input ...> 190 // or <p>Some Text</p><input ...> 191 // or <label>Some Text</label> <input ...> 192 // or Some Text <img><input ...> 193 // or <b>Some Text</b><br/> <input ...>. 194 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) { 195 base::string16 inferred_label; 196 WebNode previous = element; 197 while (true) { 198 previous = previous.previousSibling(); 199 if (previous.isNull()) 200 break; 201 202 // Skip over comments. 203 WebNode::NodeType node_type = previous.nodeType(); 204 if (node_type == WebNode::CommentNode) 205 continue; 206 207 // Otherwise, only consider normal HTML elements and their contents. 208 if (node_type != WebNode::TextNode && 209 node_type != WebNode::ElementNode) 210 break; 211 212 // A label might be split across multiple "lightweight" nodes. 213 // Coalesce any text contained in multiple consecutive 214 // (a) plain text nodes or 215 // (b) inline HTML elements that are essentially equivalent to text nodes. 216 CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b")); 217 CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong")); 218 CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span")); 219 CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font")); 220 if (previous.isTextNode() || 221 HasTagName(previous, kBold) || HasTagName(previous, kStrong) || 222 HasTagName(previous, kSpan) || HasTagName(previous, kFont)) { 223 base::string16 value = FindChildText(previous); 224 // A text node's value will be empty if it is for a line break. 225 bool add_space = previous.isTextNode() && value.empty(); 226 inferred_label = 227 CombineAndCollapseWhitespace(value, inferred_label, add_space); 228 continue; 229 } 230 231 // If we have identified a partial label and have reached a non-lightweight 232 // element, consider the label to be complete. 233 base::string16 trimmed_label; 234 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label); 235 if (!trimmed_label.empty()) 236 break; 237 238 // <img> and <br> tags often appear between the input element and its 239 // label text, so skip over them. 240 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img")); 241 CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br")); 242 if (HasTagName(previous, kImage) || HasTagName(previous, kBreak)) 243 continue; 244 245 // We only expect <p> and <label> tags to contain the full label text. 246 CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p")); 247 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); 248 if (HasTagName(previous, kPage) || HasTagName(previous, kLabel)) 249 inferred_label = FindChildText(previous); 250 251 break; 252 } 253 254 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label); 255 return inferred_label; 256 } 257 258 // Helper for |InferLabelForElement()| that infers a label, if possible, from 259 // enclosing list item, 260 // e.g. <li>Some Text<input ...><input ...><input ...></tr> 261 base::string16 InferLabelFromListItem(const WebFormControlElement& element) { 262 WebNode parent = element.parentNode(); 263 CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li")); 264 while (!parent.isNull() && parent.isElementNode() && 265 !parent.to<WebElement>().hasHTMLTagName(kListItem)) { 266 parent = parent.parentNode(); 267 } 268 269 if (!parent.isNull() && HasTagName(parent, kListItem)) 270 return FindChildText(parent); 271 272 return base::string16(); 273 } 274 275 // Helper for |InferLabelForElement()| that infers a label, if possible, from 276 // surrounding table structure, 277 // e.g. <tr><td>Some Text</td><td><input ...></td></tr> 278 // or <tr><th>Some Text</th><td><input ...></td></tr> 279 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> 280 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> 281 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) { 282 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td")); 283 WebNode parent = element.parentNode(); 284 while (!parent.isNull() && parent.isElementNode() && 285 !parent.to<WebElement>().hasHTMLTagName(kTableCell)) { 286 parent = parent.parentNode(); 287 } 288 289 if (parent.isNull()) 290 return base::string16(); 291 292 // Check all previous siblings, skipping non-element nodes, until we find a 293 // non-empty text block. 294 base::string16 inferred_label; 295 WebNode previous = parent.previousSibling(); 296 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th")); 297 while (inferred_label.empty() && !previous.isNull()) { 298 if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader)) 299 inferred_label = FindChildText(previous); 300 301 previous = previous.previousSibling(); 302 } 303 304 return inferred_label; 305 } 306 307 // Helper for |InferLabelForElement()| that infers a label, if possible, from 308 // surrounding table structure, 309 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> 310 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) { 311 CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr")); 312 WebNode parent = element.parentNode(); 313 while (!parent.isNull() && parent.isElementNode() && 314 !parent.to<WebElement>().hasHTMLTagName(kTableRow)) { 315 parent = parent.parentNode(); 316 } 317 318 if (parent.isNull()) 319 return base::string16(); 320 321 // Check all previous siblings, skipping non-element nodes, until we find a 322 // non-empty text block. 323 base::string16 inferred_label; 324 WebNode previous = parent.previousSibling(); 325 while (inferred_label.empty() && !previous.isNull()) { 326 if (HasTagName(previous, kTableRow)) 327 inferred_label = FindChildText(previous); 328 329 previous = previous.previousSibling(); 330 } 331 332 return inferred_label; 333 } 334 335 // Helper for |InferLabelForElement()| that infers a label, if possible, from 336 // a surrounding div table, 337 // e.g. <div>Some Text<span><input ...></span></div> 338 // e.g. <div>Some Text</div><div><input ...></div> 339 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) { 340 WebNode node = element.parentNode(); 341 bool looking_for_parent = true; 342 343 // Search the sibling and parent <div>s until we find a candidate label. 344 base::string16 inferred_label; 345 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div")); 346 CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table")); 347 CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset")); 348 while (inferred_label.empty() && !node.isNull()) { 349 if (HasTagName(node, kDiv)) { 350 looking_for_parent = false; 351 inferred_label = FindChildText(node); 352 } else if (looking_for_parent && 353 (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) { 354 // If the element is in a table or fieldset, its label most likely is too. 355 break; 356 } 357 358 if (node.previousSibling().isNull()) { 359 // If there are no more siblings, continue walking up the tree. 360 looking_for_parent = true; 361 } 362 363 if (looking_for_parent) 364 node = node.parentNode(); 365 else 366 node = node.previousSibling(); 367 } 368 369 return inferred_label; 370 } 371 372 // Helper for |InferLabelForElement()| that infers a label, if possible, from 373 // a surrounding definition list, 374 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> 375 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> 376 base::string16 InferLabelFromDefinitionList( 377 const WebFormControlElement& element) { 378 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd")); 379 WebNode parent = element.parentNode(); 380 while (!parent.isNull() && parent.isElementNode() && 381 !parent.to<WebElement>().hasHTMLTagName(kDefinitionData)) 382 parent = parent.parentNode(); 383 384 if (parent.isNull() || !HasTagName(parent, kDefinitionData)) 385 return base::string16(); 386 387 // Skip by any intervening text nodes. 388 WebNode previous = parent.previousSibling(); 389 while (!previous.isNull() && previous.isTextNode()) 390 previous = previous.previousSibling(); 391 392 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt")); 393 if (previous.isNull() || !HasTagName(previous, kDefinitionTag)) 394 return base::string16(); 395 396 return FindChildText(previous); 397 } 398 399 // Infers corresponding label for |element| from surrounding context in the DOM, 400 // e.g. the contents of the preceding <p> tag or text element. 401 base::string16 InferLabelForElement(const WebFormControlElement& element) { 402 base::string16 inferred_label = InferLabelFromPrevious(element); 403 if (!inferred_label.empty()) 404 return inferred_label; 405 406 // If we didn't find a label, check for list item case. 407 inferred_label = InferLabelFromListItem(element); 408 if (!inferred_label.empty()) 409 return inferred_label; 410 411 // If we didn't find a label, check for table cell case. 412 inferred_label = InferLabelFromTableColumn(element); 413 if (!inferred_label.empty()) 414 return inferred_label; 415 416 // If we didn't find a label, check for table row case. 417 inferred_label = InferLabelFromTableRow(element); 418 if (!inferred_label.empty()) 419 return inferred_label; 420 421 // If we didn't find a label, check for definition list case. 422 inferred_label = InferLabelFromDefinitionList(element); 423 if (!inferred_label.empty()) 424 return inferred_label; 425 426 // If we didn't find a label, check for div table case. 427 return InferLabelFromDivTable(element); 428 } 429 430 // Fills |option_strings| with the values of the <option> elements present in 431 // |select_element|. 432 void GetOptionStringsFromElement(const WebSelectElement& select_element, 433 std::vector<base::string16>* option_values, 434 std::vector<base::string16>* option_contents) { 435 DCHECK(!select_element.isNull()); 436 437 option_values->clear(); 438 option_contents->clear(); 439 WebVector<WebElement> list_items = select_element.listItems(); 440 441 // Constrain the maximum list length to prevent a malicious site from DOS'ing 442 // the browser, without entirely breaking autocomplete for some extreme 443 // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094 444 if (list_items.size() > kMaxListSize) 445 return; 446 447 option_values->reserve(list_items.size()); 448 option_contents->reserve(list_items.size()); 449 for (size_t i = 0; i < list_items.size(); ++i) { 450 if (IsOptionElement(list_items[i])) { 451 const WebOptionElement option = list_items[i].toConst<WebOptionElement>(); 452 option_values->push_back(option.value()); 453 option_contents->push_back(option.text()); 454 } 455 } 456 } 457 458 // The callback type used by |ForEachMatchingFormField()|. 459 typedef void (*Callback)(const FormFieldData&, 460 bool, /* is_initiating_element */ 461 blink::WebFormControlElement*); 462 463 // For each autofillable field in |data| that matches a field in the |form|, 464 // the |callback| is invoked with the corresponding |form| field data. 465 void ForEachMatchingFormField(const WebFormElement& form_element, 466 const WebElement& initiating_element, 467 const FormData& data, 468 bool only_focusable_elements, 469 bool force_override, 470 Callback callback) { 471 std::vector<WebFormControlElement> control_elements; 472 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 473 &control_elements); 474 475 if (control_elements.size() != data.fields.size()) { 476 // This case should be reachable only for pathological websites and tests, 477 // which add or remove form fields while the user is interacting with the 478 // Autofill popup. 479 return; 480 } 481 482 // It's possible that the site has injected fields into the form after the 483 // page has loaded, so we can't assert that the size of the cached control 484 // elements is equal to the size of the fields in |form|. Fortunately, the 485 // one case in the wild where this happens, paypal.com signup form, the fields 486 // are appended to the end of the form and are not visible. 487 for (size_t i = 0; i < control_elements.size(); ++i) { 488 WebFormControlElement* element = &control_elements[i]; 489 490 if (base::string16(element->nameForAutofill()) != data.fields[i].name) { 491 // This case should be reachable only for pathological websites, which 492 // rename form fields while the user is interacting with the Autofill 493 // popup. I (isherman) am not aware of any such websites, and so am 494 // optimistically including a NOTREACHED(). If you ever trip this check, 495 // please file a bug against me. 496 NOTREACHED(); 497 continue; 498 } 499 500 bool is_initiating_element = (*element == initiating_element); 501 502 // Only autofill empty fields and the field that initiated the filling, 503 // i.e. the field the user is currently editing and interacting with. 504 const WebInputElement* input_element = toWebInputElement(element); 505 if (!force_override && !is_initiating_element && 506 ((IsAutofillableInputElement(input_element) || 507 IsTextAreaElement(*element)) && 508 !element->value().isEmpty())) 509 continue; 510 511 if (!element->isEnabled() || element->isReadOnly() || 512 (only_focusable_elements && !element->isFocusable())) 513 continue; 514 515 callback(data.fields[i], is_initiating_element, element); 516 } 517 } 518 519 // Sets the |field|'s value to the value in |data|. 520 // Also sets the "autofilled" attribute, causing the background to be yellow. 521 void FillFormField(const FormFieldData& data, 522 bool is_initiating_node, 523 blink::WebFormControlElement* field) { 524 // Nothing to fill. 525 if (data.value.empty()) 526 return; 527 528 if (!data.is_autofilled) 529 return; 530 531 WebInputElement* input_element = toWebInputElement(field); 532 if (IsCheckableElement(input_element)) { 533 input_element->setChecked(data.is_checked, true); 534 } else { 535 base::string16 value = data.value; 536 if (IsTextInput(input_element) || IsMonthInput(input_element)) { 537 // If the maxlength attribute contains a negative value, maxLength() 538 // returns the default maxlength value. 539 value = value.substr(0, input_element->maxLength()); 540 } 541 field->setValue(value, true); 542 } 543 544 field->setAutofilled(true); 545 546 if (is_initiating_node && 547 ((IsTextInput(input_element) || IsMonthInput(input_element)) || 548 IsTextAreaElement(*field))) { 549 int length = field->value().length(); 550 field->setSelectionRange(length, length); 551 // Clear the current IME composition (the underline), if there is one. 552 field->document().frame()->unmarkText(); 553 } 554 } 555 556 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|. 557 // Also sets the "autofilled" attribute, causing the background to be yellow. 558 void PreviewFormField(const FormFieldData& data, 559 bool is_initiating_node, 560 blink::WebFormControlElement* field) { 561 // Nothing to preview. 562 if (data.value.empty()) 563 return; 564 565 if (!data.is_autofilled) 566 return; 567 568 // Preview input, textarea and select fields. For input fields, excludes 569 // checkboxes and radio buttons, as there is no provision for 570 // setSuggestedCheckedValue in WebInputElement. 571 WebInputElement* input_element = toWebInputElement(field); 572 if (IsTextInput(input_element) || IsMonthInput(input_element)) { 573 // If the maxlength attribute contains a negative value, maxLength() 574 // returns the default maxlength value. 575 input_element->setSuggestedValue( 576 data.value.substr(0, input_element->maxLength())); 577 input_element->setAutofilled(true); 578 } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) { 579 field->setSuggestedValue(data.value); 580 field->setAutofilled(true); 581 } 582 583 if (is_initiating_node && 584 (IsTextInput(input_element) || IsTextAreaElement(*field))) { 585 // Select the part of the text that the user didn't type. 586 int start = field->value().length(); 587 int end = field->suggestedValue().length(); 588 field->setSelectionRange(start, end); 589 } 590 } 591 592 std::string RetrievalMethodToString( 593 const WebElementDescriptor::RetrievalMethod& method) { 594 switch (method) { 595 case WebElementDescriptor::CSS_SELECTOR: 596 return "CSS_SELECTOR"; 597 case WebElementDescriptor::ID: 598 return "ID"; 599 case WebElementDescriptor::NONE: 600 return "NONE"; 601 } 602 NOTREACHED(); 603 return "UNKNOWN"; 604 } 605 606 // Recursively checks whether |node| or any of its children have a non-empty 607 // bounding box. The recursion depth is bounded by |depth|. 608 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) { 609 if (depth < 0) 610 return false; 611 if (node.hasNonEmptyBoundingBox()) 612 return true; 613 614 // The childNodes method is not a const method. Therefore it cannot be called 615 // on a const reference. Therefore we need a const cast. 616 const blink::WebNodeList& children = 617 const_cast<blink::WebNode&>(node).childNodes(); 618 size_t length = children.length(); 619 for (size_t i = 0; i < length; ++i) { 620 const blink::WebNode& item = children.item(i); 621 if (IsWebNodeVisibleImpl(item, depth - 1)) 622 return true; 623 } 624 return false; 625 } 626 627 } // namespace 628 629 const size_t kMaxParseableFields = 200; 630 631 bool IsMonthInput(const WebInputElement* element) { 632 CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month")); 633 return element && !element->isNull() && element->formControlType() == kMonth; 634 } 635 636 // All text fields, including password fields, should be extracted. 637 bool IsTextInput(const WebInputElement* element) { 638 return element && !element->isNull() && element->isTextField(); 639 } 640 641 bool IsSelectElement(const WebFormControlElement& element) { 642 // Static for improved performance. 643 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); 644 return !element.isNull() && element.formControlType() == kSelectOne; 645 } 646 647 bool IsTextAreaElement(const WebFormControlElement& element) { 648 // Static for improved performance. 649 CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea")); 650 return !element.isNull() && element.formControlType() == kTextArea; 651 } 652 653 bool IsCheckableElement(const WebInputElement* element) { 654 if (!element || element->isNull()) 655 return false; 656 657 return element->isCheckbox() || element->isRadioButton(); 658 } 659 660 bool IsAutofillableInputElement(const WebInputElement* element) { 661 return IsTextInput(element) || 662 IsMonthInput(element) || 663 IsCheckableElement(element); 664 } 665 666 const base::string16 GetFormIdentifier(const WebFormElement& form) { 667 base::string16 identifier = form.name(); 668 CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id")); 669 if (identifier.empty()) 670 identifier = form.getAttribute(kId); 671 672 return identifier; 673 } 674 675 bool IsWebNodeVisible(const blink::WebNode& node) { 676 // In the bug http://crbug.com/237216 the form's bounding box is empty 677 // however the form has non empty children. Thus we need to look at the 678 // form's children. 679 int kNodeSearchDepth = 2; 680 return IsWebNodeVisibleImpl(node, kNodeSearchDepth); 681 } 682 683 bool ClickElement(const WebDocument& document, 684 const WebElementDescriptor& element_descriptor) { 685 WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor); 686 blink::WebElement element; 687 688 switch (element_descriptor.retrieval_method) { 689 case WebElementDescriptor::CSS_SELECTOR: { 690 WebExceptionCode ec = 0; 691 element = document.querySelector(web_descriptor, ec); 692 if (ec) 693 DVLOG(1) << "Query selector failed. Error code: " << ec << "."; 694 break; 695 } 696 case WebElementDescriptor::ID: 697 element = document.getElementById(web_descriptor); 698 break; 699 case WebElementDescriptor::NONE: 700 return true; 701 } 702 703 if (element.isNull()) { 704 DVLOG(1) << "Could not find " 705 << element_descriptor.descriptor 706 << " by " 707 << RetrievalMethodToString(element_descriptor.retrieval_method) 708 << "."; 709 return false; 710 } 711 712 element.simulateClick(); 713 return true; 714 } 715 716 // Fills |autofillable_elements| with all the auto-fillable form control 717 // elements in |form_element|. 718 void ExtractAutofillableElements( 719 const WebFormElement& form_element, 720 RequirementsMask requirements, 721 std::vector<WebFormControlElement>* autofillable_elements) { 722 WebVector<WebFormControlElement> control_elements; 723 form_element.getFormControlElements(control_elements); 724 725 autofillable_elements->clear(); 726 for (size_t i = 0; i < control_elements.size(); ++i) { 727 WebFormControlElement element = control_elements[i]; 728 if (!IsAutofillableElement(element)) 729 continue; 730 731 if (requirements & REQUIRE_AUTOCOMPLETE) { 732 // TODO(isherman): WebKit currently doesn't handle the autocomplete 733 // attribute for select or textarea elements, but it probably should. 734 WebInputElement* input_element = toWebInputElement(&control_elements[i]); 735 if (IsAutofillableInputElement(input_element) && 736 !SatisfiesRequireAutocomplete(*input_element)) 737 continue; 738 } 739 740 autofillable_elements->push_back(element); 741 } 742 } 743 744 void WebFormControlElementToFormField(const WebFormControlElement& element, 745 ExtractMask extract_mask, 746 FormFieldData* field) { 747 DCHECK(field); 748 DCHECK(!element.isNull()); 749 CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete")); 750 751 // The label is not officially part of a WebFormControlElement; however, the 752 // labels for all form control elements are scraped from the DOM and set in 753 // WebFormElementToFormData. 754 field->name = element.nameForAutofill(); 755 field->form_control_type = base::UTF16ToUTF8(element.formControlType()); 756 field->autocomplete_attribute = 757 base::UTF16ToUTF8(element.getAttribute(kAutocomplete)); 758 if (field->autocomplete_attribute.size() > kMaxDataLength) { 759 // Discard overly long attribute values to avoid DOS-ing the browser 760 // process. However, send over a default string to indicate that the 761 // attribute was present. 762 field->autocomplete_attribute = "x-max-data-length-exceeded"; 763 } 764 765 if (!IsAutofillableElement(element)) 766 return; 767 768 const WebInputElement* input_element = toWebInputElement(&element); 769 if (IsAutofillableInputElement(input_element) || 770 IsTextAreaElement(element)) { 771 field->is_autofilled = element.isAutofilled(); 772 field->is_focusable = element.isFocusable(); 773 field->should_autocomplete = element.autoComplete(); 774 field->text_direction = element.directionForFormData() == 775 "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT; 776 } 777 778 if (IsAutofillableInputElement(input_element)) { 779 if (IsTextInput(input_element)) 780 field->max_length = input_element->maxLength(); 781 782 field->is_checkable = IsCheckableElement(input_element); 783 field->is_checked = input_element->isChecked(); 784 } else if (IsTextAreaElement(element)) { 785 // Nothing more to do in this case. 786 } else if (extract_mask & EXTRACT_OPTIONS) { 787 // Set option strings on the field if available. 788 DCHECK(IsSelectElement(element)); 789 const WebSelectElement select_element = element.toConst<WebSelectElement>(); 790 GetOptionStringsFromElement(select_element, 791 &field->option_values, 792 &field->option_contents); 793 } 794 795 if (!(extract_mask & EXTRACT_VALUE)) 796 return; 797 798 base::string16 value = element.value(); 799 800 if (IsSelectElement(element) && (extract_mask & EXTRACT_OPTION_TEXT)) { 801 const WebSelectElement select_element = element.toConst<WebSelectElement>(); 802 // Convert the |select_element| value to text if requested. 803 WebVector<WebElement> list_items = select_element.listItems(); 804 for (size_t i = 0; i < list_items.size(); ++i) { 805 if (IsOptionElement(list_items[i])) { 806 const WebOptionElement option_element = 807 list_items[i].toConst<WebOptionElement>(); 808 if (option_element.value() == value) { 809 value = option_element.text(); 810 break; 811 } 812 } 813 } 814 } 815 816 // Constrain the maximum data length to prevent a malicious site from DOS'ing 817 // the browser: http://crbug.com/49332 818 if (value.size() > kMaxDataLength) 819 value = value.substr(0, kMaxDataLength); 820 821 field->value = value; 822 } 823 824 bool WebFormElementToFormData( 825 const blink::WebFormElement& form_element, 826 const blink::WebFormControlElement& form_control_element, 827 RequirementsMask requirements, 828 ExtractMask extract_mask, 829 FormData* form, 830 FormFieldData* field) { 831 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); 832 CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for")); 833 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); 834 835 const WebFrame* frame = form_element.document().frame(); 836 if (!frame) 837 return false; 838 839 if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete()) 840 return false; 841 842 form->name = GetFormIdentifier(form_element); 843 form->origin = frame->document().url(); 844 form->action = frame->document().completeURL(form_element.action()); 845 form->user_submitted = form_element.wasUserSubmitted(); 846 847 // If the completed URL is not valid, just use the action we get from 848 // WebKit. 849 if (!form->action.is_valid()) 850 form->action = GURL(form_element.action()); 851 852 // A map from a FormFieldData's name to the FormFieldData itself. 853 std::map<base::string16, FormFieldData*> name_map; 854 855 // The extracted FormFields. We use pointers so we can store them in 856 // |name_map|. 857 ScopedVector<FormFieldData> form_fields; 858 859 WebVector<WebFormControlElement> control_elements; 860 form_element.getFormControlElements(control_elements); 861 862 // A vector of bools that indicate whether each field in the form meets the 863 // requirements and thus will be in the resulting |form|. 864 std::vector<bool> fields_extracted(control_elements.size(), false); 865 866 for (size_t i = 0; i < control_elements.size(); ++i) { 867 const WebFormControlElement& control_element = control_elements[i]; 868 869 if (!IsAutofillableElement(control_element)) 870 continue; 871 872 const WebInputElement* input_element = toWebInputElement(&control_element); 873 if (requirements & REQUIRE_AUTOCOMPLETE && 874 IsAutofillableInputElement(input_element) && 875 !SatisfiesRequireAutocomplete(*input_element)) 876 continue; 877 878 // Create a new FormFieldData, fill it out and map it to the field's name. 879 FormFieldData* form_field = new FormFieldData; 880 WebFormControlElementToFormField(control_element, extract_mask, form_field); 881 form_fields.push_back(form_field); 882 // TODO(jhawkins): A label element is mapped to a form control element's id. 883 // field->name() will contain the id only if the name does not exist. Add 884 // an id() method to WebFormControlElement and use that here. 885 name_map[form_field->name] = form_field; 886 fields_extracted[i] = true; 887 } 888 889 // If we failed to extract any fields, give up. Also, to avoid overly 890 // expensive computation, we impose a maximum number of allowable fields. 891 if (form_fields.empty() || form_fields.size() > kMaxParseableFields) 892 return false; 893 894 // Loop through the label elements inside the form element. For each label 895 // element, get the corresponding form control element, use the form control 896 // element's name as a key into the <name, FormFieldData> map to find the 897 // previously created FormFieldData and set the FormFieldData's label to the 898 // label.firstChild().nodeValue() of the label element. 899 WebElementCollection labels = form_element.getElementsByHTMLTagName(kLabel); 900 DCHECK(!labels.isNull()); 901 for (WebElement item = labels.firstItem(); !item.isNull(); 902 item = labels.nextItem()) { 903 WebLabelElement label = item.to<WebLabelElement>(); 904 WebFormControlElement field_element = 905 label.correspondingControl().to<WebFormControlElement>(); 906 907 base::string16 element_name; 908 if (field_element.isNull()) { 909 // Sometimes site authors will incorrectly specify the corresponding 910 // field element's name rather than its id, so we compensate here. 911 element_name = label.getAttribute(kFor); 912 } else if ( 913 !field_element.isFormControlElement() || 914 field_element.formControlType() == kHidden) { 915 continue; 916 } else { 917 element_name = field_element.nameForAutofill(); 918 } 919 920 std::map<base::string16, FormFieldData*>::iterator iter = 921 name_map.find(element_name); 922 if (iter != name_map.end()) { 923 base::string16 label_text = FindChildText(label); 924 925 // Concatenate labels because some sites might have multiple label 926 // candidates. 927 if (!iter->second->label.empty() && !label_text.empty()) 928 iter->second->label += base::ASCIIToUTF16(" "); 929 iter->second->label += label_text; 930 } 931 } 932 933 // Loop through the form control elements, extracting the label text from 934 // the DOM. We use the |fields_extracted| vector to make sure we assign the 935 // extracted label to the correct field, as it's possible |form_fields| will 936 // not contain all of the elements in |control_elements|. 937 for (size_t i = 0, field_idx = 0; 938 i < control_elements.size() && field_idx < form_fields.size(); ++i) { 939 // This field didn't meet the requirements, so don't try to find a label 940 // for it. 941 if (!fields_extracted[i]) 942 continue; 943 944 const WebFormControlElement& control_element = control_elements[i]; 945 if (form_fields[field_idx]->label.empty()) 946 form_fields[field_idx]->label = InferLabelForElement(control_element); 947 948 if (field && form_control_element == control_element) 949 *field = *form_fields[field_idx]; 950 951 ++field_idx; 952 } 953 954 // Copy the created FormFields into the resulting FormData object. 955 for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin(); 956 iter != form_fields.end(); ++iter) { 957 form->fields.push_back(**iter); 958 } 959 960 return true; 961 } 962 963 bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element, 964 FormData* form, 965 FormFieldData* field, 966 RequirementsMask requirements) { 967 if (!IsAutofillableElement(element)) 968 return false; 969 970 const WebFormElement form_element = element.form(); 971 if (form_element.isNull()) 972 return false; 973 974 ExtractMask extract_mask = 975 static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS); 976 return WebFormElementToFormData(form_element, 977 element, 978 requirements, 979 extract_mask, 980 form, 981 field); 982 } 983 984 void FillForm(const FormData& form, const WebFormControlElement& element) { 985 WebFormElement form_element = element.form(); 986 if (form_element.isNull()) 987 return; 988 989 ForEachMatchingFormField(form_element, 990 element, 991 form, 992 true, /* only_focusable_elements */ 993 false, /* don't force override */ 994 &FillFormField); 995 } 996 997 void FillFormIncludingNonFocusableElements(const FormData& form_data, 998 const WebFormElement& form_element) { 999 if (form_element.isNull()) 1000 return; 1001 1002 ForEachMatchingFormField(form_element, 1003 WebInputElement(), 1004 form_data, 1005 false, /* only_focusable_elements */ 1006 true, /* force override */ 1007 &FillFormField); 1008 } 1009 1010 void PreviewForm(const FormData& form, const WebFormControlElement& element) { 1011 WebFormElement form_element = element.form(); 1012 if (form_element.isNull()) 1013 return; 1014 1015 ForEachMatchingFormField(form_element, 1016 element, 1017 form, 1018 true, /* only_focusable_elements */ 1019 false, /* dont force override */ 1020 &PreviewFormField); 1021 } 1022 1023 bool ClearPreviewedFormWithElement(const WebFormControlElement& element, 1024 bool was_autofilled) { 1025 WebFormElement form_element = element.form(); 1026 if (form_element.isNull()) 1027 return false; 1028 1029 std::vector<WebFormControlElement> control_elements; 1030 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 1031 &control_elements); 1032 for (size_t i = 0; i < control_elements.size(); ++i) { 1033 // There might be unrelated elements in this form which have already been 1034 // auto-filled. For example, the user might have already filled the address 1035 // part of a form and now be dealing with the credit card section. We only 1036 // want to reset the auto-filled status for fields that were previewed. 1037 WebFormControlElement control_element = control_elements[i]; 1038 1039 // Only text input, textarea and select elements can be previewed. 1040 WebInputElement* input_element = toWebInputElement(&control_element); 1041 if (!IsTextInput(input_element) && 1042 !IsMonthInput(input_element) && 1043 !IsTextAreaElement(control_element) && 1044 !IsSelectElement(control_element)) 1045 continue; 1046 1047 // If the element is not auto-filled, we did not preview it, 1048 // so there is nothing to reset. 1049 if(!control_element.isAutofilled()) 1050 continue; 1051 1052 if ((IsTextInput(input_element) || 1053 IsMonthInput(input_element) || 1054 IsTextAreaElement(control_element) || 1055 IsSelectElement(control_element)) && 1056 control_element.suggestedValue().isEmpty()) 1057 continue; 1058 1059 // Clear the suggested value. For the initiating node, also restore the 1060 // original value. 1061 if (IsTextInput(input_element) || IsMonthInput(input_element) || 1062 IsTextAreaElement(control_element)) { 1063 control_element.setSuggestedValue(WebString()); 1064 bool is_initiating_node = (element == control_element); 1065 if (is_initiating_node) { 1066 control_element.setAutofilled(was_autofilled); 1067 // Clearing the suggested value in the focused node (above) can cause 1068 // selection to be lost. We force selection range to restore the text 1069 // cursor. 1070 int length = control_element.value().length(); 1071 control_element.setSelectionRange(length, length); 1072 } else { 1073 control_element.setAutofilled(false); 1074 } 1075 } else if (IsSelectElement(control_element)) { 1076 control_element.setSuggestedValue(WebString()); 1077 control_element.setAutofilled(false); 1078 } 1079 } 1080 1081 return true; 1082 } 1083 1084 bool FormWithElementIsAutofilled(const WebInputElement& element) { 1085 WebFormElement form_element = element.form(); 1086 if (form_element.isNull()) 1087 return false; 1088 1089 std::vector<WebFormControlElement> control_elements; 1090 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 1091 &control_elements); 1092 for (size_t i = 0; i < control_elements.size(); ++i) { 1093 WebInputElement* input_element = toWebInputElement(&control_elements[i]); 1094 if (!IsAutofillableInputElement(input_element)) 1095 continue; 1096 1097 if (input_element->isAutofilled()) 1098 return true; 1099 } 1100 1101 return false; 1102 } 1103 1104 bool IsWebpageEmpty(const blink::WebFrame* frame) { 1105 blink::WebDocument document = frame->document(); 1106 1107 return IsWebElementEmpty(document.head()) && 1108 IsWebElementEmpty(document.body()); 1109 } 1110 1111 bool IsWebElementEmpty(const blink::WebElement& element) { 1112 // This array contains all tags which can be present in an empty page. 1113 const char* const kAllowedValue[] = { 1114 "script", 1115 "meta", 1116 "title", 1117 }; 1118 const size_t kAllowedValueLength = arraysize(kAllowedValue); 1119 1120 if (element.isNull()) 1121 return true; 1122 // The childNodes method is not a const method. Therefore it cannot be called 1123 // on a const reference. Therefore we need a const cast. 1124 const blink::WebNodeList& children = 1125 const_cast<blink::WebElement&>(element).childNodes(); 1126 for (size_t i = 0; i < children.length(); ++i) { 1127 const blink::WebNode& item = children.item(i); 1128 1129 if (item.isTextNode() && 1130 !base::ContainsOnlyChars(item.nodeValue().utf8(), 1131 base::kWhitespaceASCII)) 1132 return false; 1133 1134 // We ignore all other items with names which begin with 1135 // the character # because they are not html tags. 1136 if (item.nodeName().utf8()[0] == '#') 1137 continue; 1138 1139 bool tag_is_allowed = false; 1140 // Test if the item name is in the kAllowedValue array 1141 for (size_t allowed_value_index = 0; 1142 allowed_value_index < kAllowedValueLength; ++allowed_value_index) { 1143 if (HasTagName(item, 1144 WebString::fromUTF8(kAllowedValue[allowed_value_index]))) { 1145 tag_is_allowed = true; 1146 break; 1147 } 1148 } 1149 if (!tag_is_allowed) 1150 return false; 1151 } 1152 return true; 1153 } 1154 1155 gfx::RectF GetScaledBoundingBox(float scale, WebFormControlElement* element) { 1156 gfx::Rect bounding_box(element->boundsInViewportSpace()); 1157 return gfx::RectF(bounding_box.x() * scale, 1158 bounding_box.y() * scale, 1159 bounding_box.width() * scale, 1160 bounding_box.height() * scale); 1161 } 1162 1163 } // namespace autofill 1164