Home | History | Annotate | Download | only in omnibox
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/omnibox/search_suggestion_parser.h"
      6 
      7 #include "base/i18n/icu_string_conversions.h"
      8 #include "base/json/json_string_value_serializer.h"
      9 #include "base/json/json_writer.h"
     10 #include "base/logging.h"
     11 #include "base/strings/string_util.h"
     12 #include "base/strings/utf_string_conversions.h"
     13 #include "base/values.h"
     14 #include "components/omnibox/autocomplete_input.h"
     15 #include "components/omnibox/url_prefix.h"
     16 #include "components/url_fixer/url_fixer.h"
     17 #include "net/base/net_util.h"
     18 #include "net/http/http_response_headers.h"
     19 #include "net/url_request/url_fetcher.h"
     20 #include "url/url_constants.h"
     21 
     22 namespace {
     23 
     24 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
     25   if (type == "ENTITY")
     26     return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
     27   if (type == "INFINITE")
     28     return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE;
     29   if (type == "PERSONALIZED_QUERY")
     30     return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
     31   if (type == "PROFILE")
     32     return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
     33   if (type == "NAVIGATION")
     34     return AutocompleteMatchType::NAVSUGGEST;
     35   if (type == "PERSONALIZED_NAVIGATION")
     36     return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
     37   return AutocompleteMatchType::SEARCH_SUGGEST;
     38 }
     39 
     40 }  // namespace
     41 
     42 // SearchSuggestionParser::Result ----------------------------------------------
     43 
     44 SearchSuggestionParser::Result::Result(bool from_keyword_provider,
     45                                        int relevance,
     46                                        bool relevance_from_server,
     47                                        AutocompleteMatchType::Type type,
     48                                        const std::string& deletion_url)
     49     : from_keyword_provider_(from_keyword_provider),
     50       type_(type),
     51       relevance_(relevance),
     52       relevance_from_server_(relevance_from_server),
     53       received_after_last_keystroke_(true),
     54       deletion_url_(deletion_url) {}
     55 
     56 SearchSuggestionParser::Result::~Result() {}
     57 
     58 // SearchSuggestionParser::SuggestResult ---------------------------------------
     59 
     60 SearchSuggestionParser::SuggestResult::SuggestResult(
     61     const base::string16& suggestion,
     62     AutocompleteMatchType::Type type,
     63     const base::string16& match_contents,
     64     const base::string16& match_contents_prefix,
     65     const base::string16& annotation,
     66     const base::string16& answer_contents,
     67     const base::string16& answer_type,
     68     const std::string& suggest_query_params,
     69     const std::string& deletion_url,
     70     bool from_keyword_provider,
     71     int relevance,
     72     bool relevance_from_server,
     73     bool should_prefetch,
     74     const base::string16& input_text)
     75     : Result(from_keyword_provider,
     76              relevance,
     77              relevance_from_server,
     78              type,
     79              deletion_url),
     80       suggestion_(suggestion),
     81       match_contents_prefix_(match_contents_prefix),
     82       annotation_(annotation),
     83       suggest_query_params_(suggest_query_params),
     84       answer_contents_(answer_contents),
     85       answer_type_(answer_type),
     86       should_prefetch_(should_prefetch) {
     87   match_contents_ = match_contents;
     88   DCHECK(!match_contents_.empty());
     89   ClassifyMatchContents(true, input_text);
     90 }
     91 
     92 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
     93 
     94 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
     95     const bool allow_bolding_all,
     96     const base::string16& input_text) {
     97   if (input_text.empty()) {
     98     // In case of zero-suggest results, do not highlight matches.
     99     match_contents_class_.push_back(
    100         ACMatchClassification(0, ACMatchClassification::NONE));
    101     return;
    102   }
    103 
    104   base::string16 lookup_text = input_text;
    105   if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) {
    106     const size_t contents_index =
    107         suggestion_.length() - match_contents_.length();
    108     // Ensure the query starts with the input text, and ends with the match
    109     // contents, and the input text has an overlap with contents.
    110     if (StartsWith(suggestion_, input_text, true) &&
    111         EndsWith(suggestion_, match_contents_, true) &&
    112         (input_text.length() > contents_index)) {
    113       lookup_text = input_text.substr(contents_index);
    114     }
    115   }
    116   size_t lookup_position = match_contents_.find(lookup_text);
    117   if (!allow_bolding_all && (lookup_position == base::string16::npos)) {
    118     // Bail if the code below to update the bolding would bold the whole
    119     // string.  Note that the string may already be entirely bolded; if
    120     // so, leave it as is.
    121     return;
    122   }
    123   match_contents_class_.clear();
    124   // We do intra-string highlighting for suggestions - the suggested segment
    125   // will be highlighted, e.g. for input_text = "you" the suggestion may be
    126   // "youtube", so we'll bold the "tube" section: you*tube*.
    127   if (input_text != match_contents_) {
    128     if (lookup_position == base::string16::npos) {
    129       // The input text is not a substring of the query string, e.g. input
    130       // text is "slasdot" and the query string is "slashdot", so we bold the
    131       // whole thing.
    132       match_contents_class_.push_back(
    133           ACMatchClassification(0, ACMatchClassification::MATCH));
    134     } else {
    135       // We don't iterate over the string here annotating all matches because
    136       // it looks odd to have every occurrence of a substring that may be as
    137       // short as a single character highlighted in a query suggestion result,
    138       // e.g. for input text "s" and query string "southwest airlines", it
    139       // looks odd if both the first and last s are highlighted.
    140       if (lookup_position != 0) {
    141         match_contents_class_.push_back(
    142             ACMatchClassification(0, ACMatchClassification::MATCH));
    143       }
    144       match_contents_class_.push_back(
    145           ACMatchClassification(lookup_position, ACMatchClassification::NONE));
    146       size_t next_fragment_position = lookup_position + lookup_text.length();
    147       if (next_fragment_position < match_contents_.length()) {
    148         match_contents_class_.push_back(ACMatchClassification(
    149             next_fragment_position, ACMatchClassification::MATCH));
    150       }
    151     }
    152   } else {
    153     // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
    154     // for the default provider or a keyword search provider.
    155     match_contents_class_.push_back(
    156         ACMatchClassification(0, ACMatchClassification::NONE));
    157   }
    158 }
    159 
    160 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
    161     const AutocompleteInput& input,
    162     bool keyword_provider_requested) const {
    163   if (!from_keyword_provider_ && keyword_provider_requested)
    164     return 100;
    165   return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
    166 }
    167 
    168 // SearchSuggestionParser::NavigationResult ------------------------------------
    169 
    170 SearchSuggestionParser::NavigationResult::NavigationResult(
    171     const AutocompleteSchemeClassifier& scheme_classifier,
    172     const GURL& url,
    173     AutocompleteMatchType::Type type,
    174     const base::string16& description,
    175     const std::string& deletion_url,
    176     bool from_keyword_provider,
    177     int relevance,
    178     bool relevance_from_server,
    179     const base::string16& input_text,
    180     const std::string& languages)
    181     : Result(from_keyword_provider, relevance, relevance_from_server, type,
    182              deletion_url),
    183       url_(url),
    184       formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
    185           url, net::FormatUrl(url, languages,
    186                               net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP,
    187                               net::UnescapeRule::SPACES, NULL, NULL, NULL),
    188           scheme_classifier)),
    189       description_(description) {
    190   DCHECK(url_.is_valid());
    191   CalculateAndClassifyMatchContents(true, input_text, languages);
    192 }
    193 
    194 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
    195 
    196 void
    197 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
    198     const bool allow_bolding_nothing,
    199     const base::string16& input_text,
    200     const std::string& languages) {
    201   if (input_text.empty()) {
    202     // In case of zero-suggest results, do not highlight matches.
    203     match_contents_class_.push_back(
    204         ACMatchClassification(0, ACMatchClassification::NONE));
    205     return;
    206   }
    207 
    208   // First look for the user's input inside the formatted url as it would be
    209   // without trimming the scheme, so we can find matches at the beginning of the
    210   // scheme.
    211   const URLPrefix* prefix =
    212       URLPrefix::BestURLPrefix(formatted_url_, input_text);
    213   size_t match_start = (prefix == NULL) ?
    214       formatted_url_.find(input_text) : prefix->prefix.length();
    215   bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
    216                    (!prefix || (match_start != 0));
    217   const net::FormatUrlTypes format_types =
    218       net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP);
    219 
    220   base::string16 match_contents = net::FormatUrl(url_, languages, format_types,
    221       net::UnescapeRule::SPACES, NULL, NULL, &match_start);
    222   // If the first match in the untrimmed string was inside a scheme that we
    223   // trimmed, look for a subsequent match.
    224   if (match_start == base::string16::npos)
    225     match_start = match_contents.find(input_text);
    226   // Update |match_contents_| and |match_contents_class_| if it's allowed.
    227   if (allow_bolding_nothing || (match_start != base::string16::npos)) {
    228     match_contents_ = match_contents;
    229     // Safe if |match_start| is npos; also safe if the input is longer than the
    230     // remaining contents after |match_start|.
    231     AutocompleteMatch::ClassifyLocationInString(match_start,
    232         input_text.length(), match_contents_.length(),
    233         ACMatchClassification::URL, &match_contents_class_);
    234   }
    235 }
    236 
    237 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
    238     const AutocompleteInput& input,
    239     bool keyword_provider_requested) const {
    240   return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
    241 }
    242 
    243 // SearchSuggestionParser::Results ---------------------------------------------
    244 
    245 SearchSuggestionParser::Results::Results()
    246     : verbatim_relevance(-1),
    247       field_trial_triggered(false),
    248       relevances_from_server(false) {}
    249 
    250 SearchSuggestionParser::Results::~Results() {}
    251 
    252 void SearchSuggestionParser::Results::Clear() {
    253   suggest_results.clear();
    254   navigation_results.clear();
    255   verbatim_relevance = -1;
    256   metadata.clear();
    257 }
    258 
    259 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
    260   if (verbatim_relevance >= 0)
    261     return true;
    262 
    263   // Right now either all results of one type will be server-scored or they will
    264   // all be locally scored, but in case we change this later, we'll just check
    265   // them all.
    266   for (SuggestResults::const_iterator i(suggest_results.begin());
    267        i != suggest_results.end(); ++i) {
    268     if (i->relevance_from_server())
    269       return true;
    270   }
    271   for (NavigationResults::const_iterator i(navigation_results.begin());
    272        i != navigation_results.end(); ++i) {
    273     if (i->relevance_from_server())
    274       return true;
    275   }
    276 
    277   return false;
    278 }
    279 
    280 // SearchSuggestionParser ------------------------------------------------------
    281 
    282 // static
    283 std::string SearchSuggestionParser::ExtractJsonData(
    284     const net::URLFetcher* source) {
    285   const net::HttpResponseHeaders* const response_headers =
    286       source->GetResponseHeaders();
    287   std::string json_data;
    288   source->GetResponseAsString(&json_data);
    289 
    290   // JSON is supposed to be UTF-8, but some suggest service providers send
    291   // JSON files in non-UTF-8 encodings.  The actual encoding is usually
    292   // specified in the Content-Type header field.
    293   if (response_headers) {
    294     std::string charset;
    295     if (response_headers->GetCharset(&charset)) {
    296       base::string16 data_16;
    297       // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
    298       if (base::CodepageToUTF16(json_data, charset.c_str(),
    299                                 base::OnStringConversionError::FAIL,
    300                                 &data_16))
    301         json_data = base::UTF16ToUTF8(data_16);
    302     }
    303   }
    304   return json_data;
    305 }
    306 
    307 // static
    308 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
    309     std::string json_data) {
    310   // The JSON response should be an array.
    311   for (size_t response_start_index = json_data.find("["), i = 0;
    312        response_start_index != std::string::npos && i < 5;
    313        response_start_index = json_data.find("[", 1), i++) {
    314     // Remove any XSSI guards to allow for JSON parsing.
    315     if (response_start_index > 0)
    316       json_data.erase(0, response_start_index);
    317 
    318     JSONStringValueSerializer deserializer(json_data);
    319     deserializer.set_allow_trailing_comma(true);
    320     int error_code = 0;
    321     scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
    322     if (error_code == 0)
    323       return data.Pass();
    324   }
    325   return scoped_ptr<base::Value>();
    326 }
    327 
    328 // static
    329 bool SearchSuggestionParser::ParseSuggestResults(
    330     const base::Value& root_val,
    331     const AutocompleteInput& input,
    332     const AutocompleteSchemeClassifier& scheme_classifier,
    333     int default_result_relevance,
    334     const std::string& languages,
    335     bool is_keyword_result,
    336     Results* results) {
    337   base::string16 query;
    338   const base::ListValue* root_list = NULL;
    339   const base::ListValue* results_list = NULL;
    340 
    341   if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
    342       query != input.text() || !root_list->GetList(1, &results_list))
    343     return false;
    344 
    345   // 3rd element: Description list.
    346   const base::ListValue* descriptions = NULL;
    347   root_list->GetList(2, &descriptions);
    348 
    349   // 4th element: Disregard the query URL list for now.
    350 
    351   // Reset suggested relevance information.
    352   results->verbatim_relevance = -1;
    353 
    354   // 5th element: Optional key-value pairs from the Suggest server.
    355   const base::ListValue* types = NULL;
    356   const base::ListValue* relevances = NULL;
    357   const base::ListValue* suggestion_details = NULL;
    358   const base::DictionaryValue* extras = NULL;
    359   int prefetch_index = -1;
    360   if (root_list->GetDictionary(4, &extras)) {
    361     extras->GetList("google:suggesttype", &types);
    362 
    363     // Discard this list if its size does not match that of the suggestions.
    364     if (extras->GetList("google:suggestrelevance", &relevances) &&
    365         (relevances->GetSize() != results_list->GetSize()))
    366       relevances = NULL;
    367     extras->GetInteger("google:verbatimrelevance",
    368                        &results->verbatim_relevance);
    369 
    370     // Check if the active suggest field trial (if any) has triggered either
    371     // for the default provider or keyword provider.
    372     results->field_trial_triggered = false;
    373     extras->GetBoolean("google:fieldtrialtriggered",
    374                        &results->field_trial_triggered);
    375 
    376     const base::DictionaryValue* client_data = NULL;
    377     if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
    378       client_data->GetInteger("phi", &prefetch_index);
    379 
    380     if (extras->GetList("google:suggestdetail", &suggestion_details) &&
    381         suggestion_details->GetSize() != results_list->GetSize())
    382       suggestion_details = NULL;
    383 
    384     // Store the metadata that came with the response in case we need to pass it
    385     // along with the prefetch query to Instant.
    386     JSONStringValueSerializer json_serializer(&results->metadata);
    387     json_serializer.Serialize(*extras);
    388   }
    389 
    390   // Clear the previous results now that new results are available.
    391   results->suggest_results.clear();
    392   results->navigation_results.clear();
    393   results->answers_image_urls.clear();
    394 
    395   base::string16 suggestion;
    396   std::string type;
    397   int relevance = default_result_relevance;
    398   // Prohibit navsuggest in FORCED_QUERY mode.  Users wants queries, not URLs.
    399   const bool allow_navsuggest =
    400       input.type() != metrics::OmniboxInputType::FORCED_QUERY;
    401   const base::string16& trimmed_input =
    402       base::CollapseWhitespace(input.text(), false);
    403   for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
    404     // Google search may return empty suggestions for weird input characters,
    405     // they make no sense at all and can cause problems in our code.
    406     if (suggestion.empty())
    407       continue;
    408 
    409     // Apply valid suggested relevance scores; discard invalid lists.
    410     if (relevances != NULL && !relevances->GetInteger(index, &relevance))
    411       relevances = NULL;
    412     AutocompleteMatchType::Type match_type =
    413         AutocompleteMatchType::SEARCH_SUGGEST;
    414     if (types && types->GetString(index, &type))
    415       match_type = GetAutocompleteMatchType(type);
    416     const base::DictionaryValue* suggestion_detail = NULL;
    417     std::string deletion_url;
    418 
    419     if (suggestion_details &&
    420         suggestion_details->GetDictionary(index, &suggestion_detail))
    421       suggestion_detail->GetString("du", &deletion_url);
    422 
    423     if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
    424         (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
    425       // Do not blindly trust the URL coming from the server to be valid.
    426       GURL url(
    427           url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string()));
    428       if (url.is_valid() && allow_navsuggest) {
    429         base::string16 title;
    430         if (descriptions != NULL)
    431           descriptions->GetString(index, &title);
    432         results->navigation_results.push_back(NavigationResult(
    433             scheme_classifier, url, match_type, title, deletion_url,
    434             is_keyword_result, relevance, relevances != NULL, input.text(),
    435             languages));
    436       }
    437     } else {
    438       base::string16 match_contents = suggestion;
    439       base::string16 match_contents_prefix;
    440       base::string16 annotation;
    441       base::string16 answer_contents;
    442       base::string16 answer_type;
    443       std::string suggest_query_params;
    444 
    445       if (suggestion_details) {
    446         suggestion_details->GetDictionary(index, &suggestion_detail);
    447         if (suggestion_detail) {
    448           suggestion_detail->GetString("t", &match_contents);
    449           suggestion_detail->GetString("mp", &match_contents_prefix);
    450           // Error correction for bad data from server.
    451           if (match_contents.empty())
    452             match_contents = suggestion;
    453           suggestion_detail->GetString("a", &annotation);
    454           suggestion_detail->GetString("q", &suggest_query_params);
    455 
    456           // Extract Answers, if provided.
    457           const base::DictionaryValue* answer_json = NULL;
    458           if (suggestion_detail->GetDictionary("ansa", &answer_json)) {
    459             match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER;
    460             GetAnswersImageURLs(answer_json, &results->answers_image_urls);
    461             std::string contents;
    462             base::JSONWriter::Write(answer_json, &contents);
    463             answer_contents = base::UTF8ToUTF16(contents);
    464             suggestion_detail->GetString("ansb", &answer_type);
    465           }
    466         }
    467       }
    468 
    469       bool should_prefetch = static_cast<int>(index) == prefetch_index;
    470       // TODO(kochi): Improve calculator suggestion presentation.
    471       results->suggest_results.push_back(SuggestResult(
    472           base::CollapseWhitespace(suggestion, false), match_type,
    473           base::CollapseWhitespace(match_contents, false),
    474           match_contents_prefix, annotation, answer_contents, answer_type,
    475           suggest_query_params, deletion_url, is_keyword_result, relevance,
    476           relevances != NULL, should_prefetch, trimmed_input));
    477     }
    478   }
    479   results->relevances_from_server = relevances != NULL;
    480   return true;
    481 }
    482 
    483 // static
    484 void SearchSuggestionParser::GetAnswersImageURLs(
    485     const base::DictionaryValue* answer_json,
    486     std::vector<GURL>* urls) {
    487   DCHECK(answer_json);
    488 
    489   const base::ListValue* lines = NULL;
    490   if (!answer_json->GetList("l", &lines) || !lines || lines->GetSize() == 0)
    491     return;
    492 
    493   for (base::ListValue::const_iterator iter = lines->begin();
    494        iter != lines->end();
    495        ++iter) {
    496     const base::DictionaryValue* line = NULL;
    497     if (!(*iter)->GetAsDictionary(&line) || !line)
    498       continue;
    499 
    500     std::string image_host_and_path;
    501     if (!line->GetString("il.i.d", &image_host_and_path) ||
    502         image_host_and_path.empty())
    503       continue;
    504     // Concatenate scheme and host/path using only ':' as separator. This is
    505     // due to the results delivering strings of the form '//host/path', which
    506     // is web-speak for "use the enclosing page's scheme", but not a valid path
    507     // of an URL.
    508     GURL image_url(
    509         GURL(std::string(url::kHttpsScheme) + ":" + image_host_and_path));
    510     if (image_url.is_valid())
    511       urls->push_back(image_url);
    512   }
    513 }
    514