1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/omnibox/search_suggestion_parser.h" 6 7 #include "base/i18n/icu_string_conversions.h" 8 #include "base/json/json_string_value_serializer.h" 9 #include "base/json/json_writer.h" 10 #include "base/logging.h" 11 #include "base/strings/string_util.h" 12 #include "base/strings/utf_string_conversions.h" 13 #include "base/values.h" 14 #include "components/omnibox/autocomplete_input.h" 15 #include "components/omnibox/url_prefix.h" 16 #include "components/url_fixer/url_fixer.h" 17 #include "net/base/net_util.h" 18 #include "net/http/http_response_headers.h" 19 #include "net/url_request/url_fetcher.h" 20 #include "url/url_constants.h" 21 22 namespace { 23 24 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) { 25 if (type == "ENTITY") 26 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY; 27 if (type == "INFINITE") 28 return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE; 29 if (type == "PERSONALIZED_QUERY") 30 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED; 31 if (type == "PROFILE") 32 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE; 33 if (type == "NAVIGATION") 34 return AutocompleteMatchType::NAVSUGGEST; 35 if (type == "PERSONALIZED_NAVIGATION") 36 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED; 37 return AutocompleteMatchType::SEARCH_SUGGEST; 38 } 39 40 } // namespace 41 42 // SearchSuggestionParser::Result ---------------------------------------------- 43 44 SearchSuggestionParser::Result::Result(bool from_keyword_provider, 45 int relevance, 46 bool relevance_from_server, 47 AutocompleteMatchType::Type type, 48 const std::string& deletion_url) 49 : from_keyword_provider_(from_keyword_provider), 50 type_(type), 51 relevance_(relevance), 52 relevance_from_server_(relevance_from_server), 53 received_after_last_keystroke_(true), 54 deletion_url_(deletion_url) {} 55 56 SearchSuggestionParser::Result::~Result() {} 57 58 // SearchSuggestionParser::SuggestResult --------------------------------------- 59 60 SearchSuggestionParser::SuggestResult::SuggestResult( 61 const base::string16& suggestion, 62 AutocompleteMatchType::Type type, 63 const base::string16& match_contents, 64 const base::string16& match_contents_prefix, 65 const base::string16& annotation, 66 const base::string16& answer_contents, 67 const base::string16& answer_type, 68 const std::string& suggest_query_params, 69 const std::string& deletion_url, 70 bool from_keyword_provider, 71 int relevance, 72 bool relevance_from_server, 73 bool should_prefetch, 74 const base::string16& input_text) 75 : Result(from_keyword_provider, 76 relevance, 77 relevance_from_server, 78 type, 79 deletion_url), 80 suggestion_(suggestion), 81 match_contents_prefix_(match_contents_prefix), 82 annotation_(annotation), 83 suggest_query_params_(suggest_query_params), 84 answer_contents_(answer_contents), 85 answer_type_(answer_type), 86 should_prefetch_(should_prefetch) { 87 match_contents_ = match_contents; 88 DCHECK(!match_contents_.empty()); 89 ClassifyMatchContents(true, input_text); 90 } 91 92 SearchSuggestionParser::SuggestResult::~SuggestResult() {} 93 94 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents( 95 const bool allow_bolding_all, 96 const base::string16& input_text) { 97 if (input_text.empty()) { 98 // In case of zero-suggest results, do not highlight matches. 99 match_contents_class_.push_back( 100 ACMatchClassification(0, ACMatchClassification::NONE)); 101 return; 102 } 103 104 base::string16 lookup_text = input_text; 105 if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) { 106 const size_t contents_index = 107 suggestion_.length() - match_contents_.length(); 108 // Ensure the query starts with the input text, and ends with the match 109 // contents, and the input text has an overlap with contents. 110 if (StartsWith(suggestion_, input_text, true) && 111 EndsWith(suggestion_, match_contents_, true) && 112 (input_text.length() > contents_index)) { 113 lookup_text = input_text.substr(contents_index); 114 } 115 } 116 size_t lookup_position = match_contents_.find(lookup_text); 117 if (!allow_bolding_all && (lookup_position == base::string16::npos)) { 118 // Bail if the code below to update the bolding would bold the whole 119 // string. Note that the string may already be entirely bolded; if 120 // so, leave it as is. 121 return; 122 } 123 match_contents_class_.clear(); 124 // We do intra-string highlighting for suggestions - the suggested segment 125 // will be highlighted, e.g. for input_text = "you" the suggestion may be 126 // "youtube", so we'll bold the "tube" section: you*tube*. 127 if (input_text != match_contents_) { 128 if (lookup_position == base::string16::npos) { 129 // The input text is not a substring of the query string, e.g. input 130 // text is "slasdot" and the query string is "slashdot", so we bold the 131 // whole thing. 132 match_contents_class_.push_back( 133 ACMatchClassification(0, ACMatchClassification::MATCH)); 134 } else { 135 // We don't iterate over the string here annotating all matches because 136 // it looks odd to have every occurrence of a substring that may be as 137 // short as a single character highlighted in a query suggestion result, 138 // e.g. for input text "s" and query string "southwest airlines", it 139 // looks odd if both the first and last s are highlighted. 140 if (lookup_position != 0) { 141 match_contents_class_.push_back( 142 ACMatchClassification(0, ACMatchClassification::MATCH)); 143 } 144 match_contents_class_.push_back( 145 ACMatchClassification(lookup_position, ACMatchClassification::NONE)); 146 size_t next_fragment_position = lookup_position + lookup_text.length(); 147 if (next_fragment_position < match_contents_.length()) { 148 match_contents_class_.push_back(ACMatchClassification( 149 next_fragment_position, ACMatchClassification::MATCH)); 150 } 151 } 152 } else { 153 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either 154 // for the default provider or a keyword search provider. 155 match_contents_class_.push_back( 156 ACMatchClassification(0, ACMatchClassification::NONE)); 157 } 158 } 159 160 int SearchSuggestionParser::SuggestResult::CalculateRelevance( 161 const AutocompleteInput& input, 162 bool keyword_provider_requested) const { 163 if (!from_keyword_provider_ && keyword_provider_requested) 164 return 100; 165 return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600); 166 } 167 168 // SearchSuggestionParser::NavigationResult ------------------------------------ 169 170 SearchSuggestionParser::NavigationResult::NavigationResult( 171 const AutocompleteSchemeClassifier& scheme_classifier, 172 const GURL& url, 173 AutocompleteMatchType::Type type, 174 const base::string16& description, 175 const std::string& deletion_url, 176 bool from_keyword_provider, 177 int relevance, 178 bool relevance_from_server, 179 const base::string16& input_text, 180 const std::string& languages) 181 : Result(from_keyword_provider, relevance, relevance_from_server, type, 182 deletion_url), 183 url_(url), 184 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning( 185 url, net::FormatUrl(url, languages, 186 net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP, 187 net::UnescapeRule::SPACES, NULL, NULL, NULL), 188 scheme_classifier)), 189 description_(description) { 190 DCHECK(url_.is_valid()); 191 CalculateAndClassifyMatchContents(true, input_text, languages); 192 } 193 194 SearchSuggestionParser::NavigationResult::~NavigationResult() {} 195 196 void 197 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents( 198 const bool allow_bolding_nothing, 199 const base::string16& input_text, 200 const std::string& languages) { 201 if (input_text.empty()) { 202 // In case of zero-suggest results, do not highlight matches. 203 match_contents_class_.push_back( 204 ACMatchClassification(0, ACMatchClassification::NONE)); 205 return; 206 } 207 208 // First look for the user's input inside the formatted url as it would be 209 // without trimming the scheme, so we can find matches at the beginning of the 210 // scheme. 211 const URLPrefix* prefix = 212 URLPrefix::BestURLPrefix(formatted_url_, input_text); 213 size_t match_start = (prefix == NULL) ? 214 formatted_url_.find(input_text) : prefix->prefix.length(); 215 bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) && 216 (!prefix || (match_start != 0)); 217 const net::FormatUrlTypes format_types = 218 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP); 219 220 base::string16 match_contents = net::FormatUrl(url_, languages, format_types, 221 net::UnescapeRule::SPACES, NULL, NULL, &match_start); 222 // If the first match in the untrimmed string was inside a scheme that we 223 // trimmed, look for a subsequent match. 224 if (match_start == base::string16::npos) 225 match_start = match_contents.find(input_text); 226 // Update |match_contents_| and |match_contents_class_| if it's allowed. 227 if (allow_bolding_nothing || (match_start != base::string16::npos)) { 228 match_contents_ = match_contents; 229 // Safe if |match_start| is npos; also safe if the input is longer than the 230 // remaining contents after |match_start|. 231 AutocompleteMatch::ClassifyLocationInString(match_start, 232 input_text.length(), match_contents_.length(), 233 ACMatchClassification::URL, &match_contents_class_); 234 } 235 } 236 237 int SearchSuggestionParser::NavigationResult::CalculateRelevance( 238 const AutocompleteInput& input, 239 bool keyword_provider_requested) const { 240 return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150; 241 } 242 243 // SearchSuggestionParser::Results --------------------------------------------- 244 245 SearchSuggestionParser::Results::Results() 246 : verbatim_relevance(-1), 247 field_trial_triggered(false), 248 relevances_from_server(false) {} 249 250 SearchSuggestionParser::Results::~Results() {} 251 252 void SearchSuggestionParser::Results::Clear() { 253 suggest_results.clear(); 254 navigation_results.clear(); 255 verbatim_relevance = -1; 256 metadata.clear(); 257 } 258 259 bool SearchSuggestionParser::Results::HasServerProvidedScores() const { 260 if (verbatim_relevance >= 0) 261 return true; 262 263 // Right now either all results of one type will be server-scored or they will 264 // all be locally scored, but in case we change this later, we'll just check 265 // them all. 266 for (SuggestResults::const_iterator i(suggest_results.begin()); 267 i != suggest_results.end(); ++i) { 268 if (i->relevance_from_server()) 269 return true; 270 } 271 for (NavigationResults::const_iterator i(navigation_results.begin()); 272 i != navigation_results.end(); ++i) { 273 if (i->relevance_from_server()) 274 return true; 275 } 276 277 return false; 278 } 279 280 // SearchSuggestionParser ------------------------------------------------------ 281 282 // static 283 std::string SearchSuggestionParser::ExtractJsonData( 284 const net::URLFetcher* source) { 285 const net::HttpResponseHeaders* const response_headers = 286 source->GetResponseHeaders(); 287 std::string json_data; 288 source->GetResponseAsString(&json_data); 289 290 // JSON is supposed to be UTF-8, but some suggest service providers send 291 // JSON files in non-UTF-8 encodings. The actual encoding is usually 292 // specified in the Content-Type header field. 293 if (response_headers) { 294 std::string charset; 295 if (response_headers->GetCharset(&charset)) { 296 base::string16 data_16; 297 // TODO(jungshik): Switch to CodePageToUTF8 after it's added. 298 if (base::CodepageToUTF16(json_data, charset.c_str(), 299 base::OnStringConversionError::FAIL, 300 &data_16)) 301 json_data = base::UTF16ToUTF8(data_16); 302 } 303 } 304 return json_data; 305 } 306 307 // static 308 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData( 309 std::string json_data) { 310 // The JSON response should be an array. 311 for (size_t response_start_index = json_data.find("["), i = 0; 312 response_start_index != std::string::npos && i < 5; 313 response_start_index = json_data.find("[", 1), i++) { 314 // Remove any XSSI guards to allow for JSON parsing. 315 if (response_start_index > 0) 316 json_data.erase(0, response_start_index); 317 318 JSONStringValueSerializer deserializer(json_data); 319 deserializer.set_allow_trailing_comma(true); 320 int error_code = 0; 321 scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL)); 322 if (error_code == 0) 323 return data.Pass(); 324 } 325 return scoped_ptr<base::Value>(); 326 } 327 328 // static 329 bool SearchSuggestionParser::ParseSuggestResults( 330 const base::Value& root_val, 331 const AutocompleteInput& input, 332 const AutocompleteSchemeClassifier& scheme_classifier, 333 int default_result_relevance, 334 const std::string& languages, 335 bool is_keyword_result, 336 Results* results) { 337 base::string16 query; 338 const base::ListValue* root_list = NULL; 339 const base::ListValue* results_list = NULL; 340 341 if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) || 342 query != input.text() || !root_list->GetList(1, &results_list)) 343 return false; 344 345 // 3rd element: Description list. 346 const base::ListValue* descriptions = NULL; 347 root_list->GetList(2, &descriptions); 348 349 // 4th element: Disregard the query URL list for now. 350 351 // Reset suggested relevance information. 352 results->verbatim_relevance = -1; 353 354 // 5th element: Optional key-value pairs from the Suggest server. 355 const base::ListValue* types = NULL; 356 const base::ListValue* relevances = NULL; 357 const base::ListValue* suggestion_details = NULL; 358 const base::DictionaryValue* extras = NULL; 359 int prefetch_index = -1; 360 if (root_list->GetDictionary(4, &extras)) { 361 extras->GetList("google:suggesttype", &types); 362 363 // Discard this list if its size does not match that of the suggestions. 364 if (extras->GetList("google:suggestrelevance", &relevances) && 365 (relevances->GetSize() != results_list->GetSize())) 366 relevances = NULL; 367 extras->GetInteger("google:verbatimrelevance", 368 &results->verbatim_relevance); 369 370 // Check if the active suggest field trial (if any) has triggered either 371 // for the default provider or keyword provider. 372 results->field_trial_triggered = false; 373 extras->GetBoolean("google:fieldtrialtriggered", 374 &results->field_trial_triggered); 375 376 const base::DictionaryValue* client_data = NULL; 377 if (extras->GetDictionary("google:clientdata", &client_data) && client_data) 378 client_data->GetInteger("phi", &prefetch_index); 379 380 if (extras->GetList("google:suggestdetail", &suggestion_details) && 381 suggestion_details->GetSize() != results_list->GetSize()) 382 suggestion_details = NULL; 383 384 // Store the metadata that came with the response in case we need to pass it 385 // along with the prefetch query to Instant. 386 JSONStringValueSerializer json_serializer(&results->metadata); 387 json_serializer.Serialize(*extras); 388 } 389 390 // Clear the previous results now that new results are available. 391 results->suggest_results.clear(); 392 results->navigation_results.clear(); 393 results->answers_image_urls.clear(); 394 395 base::string16 suggestion; 396 std::string type; 397 int relevance = default_result_relevance; 398 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs. 399 const bool allow_navsuggest = 400 input.type() != metrics::OmniboxInputType::FORCED_QUERY; 401 const base::string16& trimmed_input = 402 base::CollapseWhitespace(input.text(), false); 403 for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) { 404 // Google search may return empty suggestions for weird input characters, 405 // they make no sense at all and can cause problems in our code. 406 if (suggestion.empty()) 407 continue; 408 409 // Apply valid suggested relevance scores; discard invalid lists. 410 if (relevances != NULL && !relevances->GetInteger(index, &relevance)) 411 relevances = NULL; 412 AutocompleteMatchType::Type match_type = 413 AutocompleteMatchType::SEARCH_SUGGEST; 414 if (types && types->GetString(index, &type)) 415 match_type = GetAutocompleteMatchType(type); 416 const base::DictionaryValue* suggestion_detail = NULL; 417 std::string deletion_url; 418 419 if (suggestion_details && 420 suggestion_details->GetDictionary(index, &suggestion_detail)) 421 suggestion_detail->GetString("du", &deletion_url); 422 423 if ((match_type == AutocompleteMatchType::NAVSUGGEST) || 424 (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) { 425 // Do not blindly trust the URL coming from the server to be valid. 426 GURL url( 427 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string())); 428 if (url.is_valid() && allow_navsuggest) { 429 base::string16 title; 430 if (descriptions != NULL) 431 descriptions->GetString(index, &title); 432 results->navigation_results.push_back(NavigationResult( 433 scheme_classifier, url, match_type, title, deletion_url, 434 is_keyword_result, relevance, relevances != NULL, input.text(), 435 languages)); 436 } 437 } else { 438 base::string16 match_contents = suggestion; 439 base::string16 match_contents_prefix; 440 base::string16 annotation; 441 base::string16 answer_contents; 442 base::string16 answer_type; 443 std::string suggest_query_params; 444 445 if (suggestion_details) { 446 suggestion_details->GetDictionary(index, &suggestion_detail); 447 if (suggestion_detail) { 448 suggestion_detail->GetString("t", &match_contents); 449 suggestion_detail->GetString("mp", &match_contents_prefix); 450 // Error correction for bad data from server. 451 if (match_contents.empty()) 452 match_contents = suggestion; 453 suggestion_detail->GetString("a", &annotation); 454 suggestion_detail->GetString("q", &suggest_query_params); 455 456 // Extract Answers, if provided. 457 const base::DictionaryValue* answer_json = NULL; 458 if (suggestion_detail->GetDictionary("ansa", &answer_json)) { 459 match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER; 460 GetAnswersImageURLs(answer_json, &results->answers_image_urls); 461 std::string contents; 462 base::JSONWriter::Write(answer_json, &contents); 463 answer_contents = base::UTF8ToUTF16(contents); 464 suggestion_detail->GetString("ansb", &answer_type); 465 } 466 } 467 } 468 469 bool should_prefetch = static_cast<int>(index) == prefetch_index; 470 // TODO(kochi): Improve calculator suggestion presentation. 471 results->suggest_results.push_back(SuggestResult( 472 base::CollapseWhitespace(suggestion, false), match_type, 473 base::CollapseWhitespace(match_contents, false), 474 match_contents_prefix, annotation, answer_contents, answer_type, 475 suggest_query_params, deletion_url, is_keyword_result, relevance, 476 relevances != NULL, should_prefetch, trimmed_input)); 477 } 478 } 479 results->relevances_from_server = relevances != NULL; 480 return true; 481 } 482 483 // static 484 void SearchSuggestionParser::GetAnswersImageURLs( 485 const base::DictionaryValue* answer_json, 486 std::vector<GURL>* urls) { 487 DCHECK(answer_json); 488 489 const base::ListValue* lines = NULL; 490 if (!answer_json->GetList("l", &lines) || !lines || lines->GetSize() == 0) 491 return; 492 493 for (base::ListValue::const_iterator iter = lines->begin(); 494 iter != lines->end(); 495 ++iter) { 496 const base::DictionaryValue* line = NULL; 497 if (!(*iter)->GetAsDictionary(&line) || !line) 498 continue; 499 500 std::string image_host_and_path; 501 if (!line->GetString("il.i.d", &image_host_and_path) || 502 image_host_and_path.empty()) 503 continue; 504 // Concatenate scheme and host/path using only ':' as separator. This is 505 // due to the results delivering strings of the form '//host/path', which 506 // is web-speak for "use the enclosing page's scheme", but not a valid path 507 // of an URL. 508 GURL image_url( 509 GURL(std::string(url::kHttpsScheme) + ":" + image_host_and_path)); 510 if (image_url.is_valid()) 511 urls->push_back(image_url); 512 } 513 } 514