Home | History | Annotate | Download | only in spellchecker
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // The |FeedbackSender| object stores the user feedback to spellcheck
      6 // suggestions in a |Feedback| object.
      7 //
      8 // When spelling service returns spellcheck results, these results first arrive
      9 // in |FeedbackSender| to assign hash identifiers for each
     10 // misspelling-suggestion pair. If the spelling service identifies the same
     11 // misspelling as already displayed to the user, then |FeedbackSender| reuses
     12 // the same hash identifiers to avoid duplication. It detects the duplicates by
     13 // comparing misspelling offsets in text. Spelling service can return duplicates
     14 // because we request spellcheck for whole paragraphs, as context around a
     15 // misspelled word is important to the spellcheck algorithm.
     16 //
     17 // All feedback is initially pending. When a user acts upon a misspelling such
     18 // that the misspelling is no longer displayed (red squiggly line goes away),
     19 // then the feedback for this misspelling is finalized. All finalized feedback
     20 // is erased after being sent to the spelling service. Pending feedback is kept
     21 // around for |kSessionHours| hours and then finalized even if user did not act
     22 // on the misspellings.
     23 //
     24 // |FeedbackSender| periodically requests a list of hashes of all remaining
     25 // misspellings in renderers. When a renderer responds with a list of hashes,
     26 // |FeedbackSender| uses the list to determine which misspellings are no longer
     27 // displayed to the user and sends the current state of user feedback to the
     28 // spelling service.
     29 
     30 #include "chrome/browser/spellchecker/feedback_sender.h"
     31 
     32 #include <algorithm>
     33 #include <iterator>
     34 
     35 #include "base/command_line.h"
     36 #include "base/hash.h"
     37 #include "base/json/json_writer.h"
     38 #include "base/metrics/field_trial.h"
     39 #include "base/stl_util.h"
     40 #include "base/strings/string_number_conversions.h"
     41 #include "base/strings/stringprintf.h"
     42 #include "base/values.h"
     43 #include "chrome/browser/spellchecker/word_trimmer.h"
     44 #include "chrome/common/chrome_switches.h"
     45 #include "chrome/common/spellcheck_common.h"
     46 #include "chrome/common/spellcheck_marker.h"
     47 #include "chrome/common/spellcheck_messages.h"
     48 #include "content/public/browser/render_process_host.h"
     49 #include "google_apis/google_api_keys.h"
     50 #include "net/base/load_flags.h"
     51 #include "net/url_request/url_fetcher.h"
     52 #include "net/url_request/url_request_context_getter.h"
     53 
     54 namespace spellcheck {
     55 
     56 namespace {
     57 
     58 // The default URL where feedback data is sent.
     59 const char kFeedbackServiceURL[] = "https://www.googleapis.com/rpc";
     60 
     61 // The minimum number of seconds between sending batches of feedback.
     62 const int kMinIntervalSeconds = 5;
     63 
     64 // Returns a hash of |session_start|, the current timestamp, and
     65 // |suggestion_index|.
     66 uint32 BuildHash(const base::Time& session_start, size_t suggestion_index) {
     67   return base::Hash(
     68       base::StringPrintf("%" PRId64 "%" PRId64 "%" PRIuS,
     69                          session_start.ToInternalValue(),
     70                          base::Time::Now().ToInternalValue(),
     71                          suggestion_index));
     72 }
     73 
     74 // Returns a pending feedback data structure for the spellcheck |result| and
     75 // |text|.
     76 Misspelling BuildFeedback(const SpellCheckResult& result,
     77                           const base::string16& text) {
     78   size_t start = result.location;
     79   base::string16 context = TrimWords(&start,
     80                                result.length,
     81                                text,
     82                                chrome::spellcheck_common::kContextWordCount);
     83   return Misspelling(context,
     84                      start,
     85                      result.length,
     86                      std::vector<base::string16>(1, result.replacement),
     87                      result.hash);
     88 }
     89 
     90 // Builds suggestion info from |suggestions|. The caller owns the result.
     91 base::ListValue* BuildSuggestionInfo(
     92     const std::vector<Misspelling>& suggestions,
     93     bool is_first_feedback_batch) {
     94   base::ListValue* list = new base::ListValue;
     95   for (std::vector<Misspelling>::const_iterator suggestion_it =
     96            suggestions.begin();
     97        suggestion_it != suggestions.end();
     98        ++suggestion_it) {
     99     base::DictionaryValue* suggestion = suggestion_it->Serialize();
    100     suggestion->SetBoolean("isFirstInSession", is_first_feedback_batch);
    101     suggestion->SetBoolean("isAutoCorrection", false);
    102     list->Append(suggestion);
    103   }
    104   return list;
    105 }
    106 
    107 // Builds feedback parameters from |suggestion_info|, |language|, and |country|.
    108 // Takes ownership of |suggestion_list|. The caller owns the result.
    109 base::DictionaryValue* BuildParams(base::ListValue* suggestion_info,
    110                                    const std::string& language,
    111                                    const std::string& country) {
    112   base::DictionaryValue* params = new base::DictionaryValue;
    113   params->Set("suggestionInfo", suggestion_info);
    114   params->SetString("key", google_apis::GetAPIKey());
    115   params->SetString("language", language);
    116   params->SetString("originCountry", country);
    117   params->SetString("clientName", "Chrome");
    118   return params;
    119 }
    120 
    121 // Builds feedback data from |params|. Takes ownership of |params|. The caller
    122 // owns the result.
    123 base::Value* BuildFeedbackValue(base::DictionaryValue* params,
    124                                 const std::string& api_version) {
    125   base::DictionaryValue* result = new base::DictionaryValue;
    126   result->Set("params", params);
    127   result->SetString("method", "spelling.feedback");
    128   result->SetString("apiVersion", api_version);
    129   return result;
    130 }
    131 
    132 // Returns true if the misspelling location is within text bounds.
    133 bool IsInBounds(int misspelling_location,
    134                 int misspelling_length,
    135                 size_t text_length) {
    136   return misspelling_location >= 0 && misspelling_length > 0 &&
    137          static_cast<size_t>(misspelling_location) < text_length &&
    138          static_cast<size_t>(misspelling_location + misspelling_length) <=
    139              text_length;
    140 }
    141 
    142 // Returns the feedback API version.
    143 std::string GetApiVersion() {
    144   // This guard is temporary.
    145   // TODO(rouslan): Remove the guard. http://crbug.com/247726
    146   if (base::FieldTrialList::FindFullName(kFeedbackFieldTrialName) ==
    147           kFeedbackFieldTrialEnabledGroupName &&
    148       CommandLine::ForCurrentProcess()->HasSwitch(
    149           switches::kEnableSpellingFeedbackFieldTrial)) {
    150     return "v2-internal";
    151   }
    152   return "v2";
    153 }
    154 
    155 }  // namespace
    156 
    157 FeedbackSender::FeedbackSender(net::URLRequestContextGetter* request_context,
    158                                const std::string& language,
    159                                const std::string& country)
    160     : request_context_(request_context),
    161       api_version_(GetApiVersion()),
    162       language_(language),
    163       country_(country),
    164       misspelling_counter_(0),
    165       session_start_(base::Time::Now()),
    166       feedback_service_url_(kFeedbackServiceURL) {
    167   // The command-line switch is for testing and temporary.
    168   // TODO(rouslan): Remove the command-line switch when testing is complete.
    169   // http://crbug.com/247726
    170   if (CommandLine::ForCurrentProcess()->HasSwitch(
    171           switches::kSpellingServiceFeedbackUrl)) {
    172     feedback_service_url_ =
    173         GURL(CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
    174             switches::kSpellingServiceFeedbackUrl));
    175   }
    176 }
    177 
    178 FeedbackSender::~FeedbackSender() {
    179 }
    180 
    181 void FeedbackSender::SelectedSuggestion(uint32 hash, int suggestion_index) {
    182   Misspelling* misspelling = feedback_.GetMisspelling(hash);
    183   // GetMisspelling() returns null for flushed feedback. Feedback is flushed
    184   // when the session expires every |kSessionHours| hours.
    185   if (!misspelling)
    186     return;
    187   misspelling->action.type = SpellcheckAction::TYPE_SELECT;
    188   misspelling->action.index = suggestion_index;
    189   misspelling->timestamp = base::Time::Now();
    190 }
    191 
    192 void FeedbackSender::AddedToDictionary(uint32 hash) {
    193   Misspelling* misspelling = feedback_.GetMisspelling(hash);
    194   // GetMisspelling() returns null for flushed feedback. Feedback is flushed
    195   // when the session expires every |kSessionHours| hours.
    196   if (!misspelling)
    197     return;
    198   misspelling->action.type = SpellcheckAction::TYPE_ADD_TO_DICT;
    199   misspelling->timestamp = base::Time::Now();
    200   const std::set<uint32>& hashes =
    201       feedback_.FindMisspellings(misspelling->GetMisspelledString());
    202   for (std::set<uint32>::const_iterator hash_it = hashes.begin();
    203        hash_it != hashes.end();
    204        ++hash_it) {
    205     Misspelling* duplicate_misspelling = feedback_.GetMisspelling(*hash_it);
    206     if (!duplicate_misspelling || duplicate_misspelling->action.IsFinal())
    207       continue;
    208     duplicate_misspelling->action.type = SpellcheckAction::TYPE_ADD_TO_DICT;
    209     duplicate_misspelling->timestamp = misspelling->timestamp;
    210   }
    211 }
    212 
    213 void FeedbackSender::RecordInDictionary(uint32 hash) {
    214   Misspelling* misspelling = feedback_.GetMisspelling(hash);
    215   // GetMisspelling() returns null for flushed feedback. Feedback is flushed
    216   // when the session expires every |kSessionHours| hours.
    217   if (!misspelling)
    218     return;
    219   misspelling->action.type = SpellcheckAction::TYPE_IN_DICTIONARY;
    220 }
    221 
    222 void FeedbackSender::IgnoredSuggestions(uint32 hash) {
    223   Misspelling* misspelling = feedback_.GetMisspelling(hash);
    224   // GetMisspelling() returns null for flushed feedback. Feedback is flushed
    225   // when the session expires every |kSessionHours| hours.
    226   if (!misspelling)
    227     return;
    228   misspelling->action.type = SpellcheckAction::TYPE_PENDING_IGNORE;
    229   misspelling->timestamp = base::Time::Now();
    230 }
    231 
    232 void FeedbackSender::ManuallyCorrected(uint32 hash,
    233                                        const base::string16& correction) {
    234   Misspelling* misspelling = feedback_.GetMisspelling(hash);
    235   // GetMisspelling() returns null for flushed feedback. Feedback is flushed
    236   // when the session expires every |kSessionHours| hours.
    237   if (!misspelling)
    238     return;
    239   misspelling->action.type = SpellcheckAction::TYPE_MANUALLY_CORRECTED;
    240   misspelling->action.value = correction;
    241   misspelling->timestamp = base::Time::Now();
    242 }
    243 
    244 void FeedbackSender::OnReceiveDocumentMarkers(
    245     int renderer_process_id,
    246     const std::vector<uint32>& markers) {
    247   if ((base::Time::Now() - session_start_).InHours() >=
    248       chrome::spellcheck_common::kSessionHours) {
    249     FlushFeedback();
    250     return;
    251   }
    252 
    253   if (!feedback_.RendererHasMisspellings(renderer_process_id))
    254     return;
    255 
    256   feedback_.FinalizeRemovedMisspellings(renderer_process_id, markers);
    257   SendFeedback(feedback_.GetMisspellingsInRenderer(renderer_process_id),
    258                !renderers_sent_feedback_.count(renderer_process_id));
    259   renderers_sent_feedback_.insert(renderer_process_id);
    260   feedback_.EraseFinalizedMisspellings(renderer_process_id);
    261 }
    262 
    263 void FeedbackSender::OnSpellcheckResults(
    264     int renderer_process_id,
    265     const base::string16& text,
    266     const std::vector<SpellCheckMarker>& markers,
    267     std::vector<SpellCheckResult>* results) {
    268   // Don't collect feedback if not going to send it.
    269   if (!timer_.IsRunning())
    270     return;
    271 
    272   // Generate a map of marker offsets to marker hashes. This map helps to
    273   // efficiently lookup feedback data based on the position of the misspelling
    274   // in text.
    275   typedef std::map<size_t, uint32> MarkerMap;
    276   MarkerMap marker_map;
    277   for (size_t i = 0; i < markers.size(); ++i)
    278     marker_map[markers[i].offset] = markers[i].hash;
    279 
    280   for (std::vector<SpellCheckResult>::iterator result_it = results->begin();
    281        result_it != results->end();
    282        ++result_it) {
    283     if (!IsInBounds(result_it->location, result_it->length, text.length()))
    284       continue;
    285     MarkerMap::const_iterator marker_it = marker_map.find(result_it->location);
    286     if (marker_it != marker_map.end() &&
    287         feedback_.HasMisspelling(marker_it->second)) {
    288       // If the renderer already has a marker for this spellcheck result, then
    289       // set the hash of the spellcheck result to be the same as the marker.
    290       result_it->hash = marker_it->second;
    291     } else {
    292       // If the renderer does not yet have a marker for this spellcheck result,
    293       // then generate a new hash for the spellcheck result.
    294       result_it->hash = BuildHash(session_start_, ++misspelling_counter_);
    295     }
    296     // Save the feedback data for the spellcheck result.
    297     feedback_.AddMisspelling(renderer_process_id,
    298                              BuildFeedback(*result_it, text));
    299   }
    300 }
    301 
    302 void FeedbackSender::OnLanguageCountryChange(const std::string& language,
    303                                              const std::string& country) {
    304   FlushFeedback();
    305   language_ = language;
    306   country_ = country;
    307 }
    308 
    309 void FeedbackSender::StartFeedbackCollection() {
    310   if (timer_.IsRunning())
    311     return;
    312 
    313   int interval_seconds = chrome::spellcheck_common::kFeedbackIntervalSeconds;
    314   // This command-line switch is for testing and temporary.
    315   // TODO(rouslan): Remove the command-line switch when testing is complete.
    316   // http://crbug.com/247726
    317   if (CommandLine::ForCurrentProcess()->HasSwitch(
    318           switches::kSpellingServiceFeedbackIntervalSeconds)) {
    319     base::StringToInt(CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
    320                           switches::kSpellingServiceFeedbackIntervalSeconds),
    321                       &interval_seconds);
    322     if (interval_seconds < kMinIntervalSeconds)
    323       interval_seconds = kMinIntervalSeconds;
    324     static const int kSessionSeconds =
    325         chrome::spellcheck_common::kSessionHours * 60 * 60;
    326     if (interval_seconds >  kSessionSeconds)
    327       interval_seconds = kSessionSeconds;
    328   }
    329   timer_.Start(FROM_HERE,
    330                base::TimeDelta::FromSeconds(interval_seconds),
    331                this,
    332                &FeedbackSender::RequestDocumentMarkers);
    333 }
    334 
    335 void FeedbackSender::StopFeedbackCollection() {
    336   if (!timer_.IsRunning())
    337     return;
    338 
    339   FlushFeedback();
    340   timer_.Stop();
    341 }
    342 
    343 void FeedbackSender::OnURLFetchComplete(const net::URLFetcher* source) {
    344   for (ScopedVector<net::URLFetcher>::iterator sender_it = senders_.begin();
    345        sender_it != senders_.end();
    346        ++sender_it) {
    347     if (*sender_it == source) {
    348       senders_.erase(sender_it);
    349       return;
    350     }
    351   }
    352   delete source;
    353 }
    354 
    355 void FeedbackSender::RequestDocumentMarkers() {
    356   // Request document markers from all the renderers that are still alive.
    357   std::set<int> alive_renderers;
    358   for (content::RenderProcessHost::iterator it(
    359            content::RenderProcessHost::AllHostsIterator());
    360        !it.IsAtEnd();
    361        it.Advance()) {
    362     alive_renderers.insert(it.GetCurrentValue()->GetID());
    363     it.GetCurrentValue()->Send(new SpellCheckMsg_RequestDocumentMarkers());
    364   }
    365 
    366   // Asynchronously send out the feedback for all the renderers that are no
    367   // longer alive.
    368   std::vector<int> known_renderers = feedback_.GetRendersWithMisspellings();
    369   std::sort(known_renderers.begin(), known_renderers.end());
    370   std::vector<int> dead_renderers =
    371       base::STLSetDifference<std::vector<int> >(known_renderers,
    372                                                 alive_renderers);
    373   for (std::vector<int>::const_iterator it = dead_renderers.begin();
    374        it != dead_renderers.end();
    375        ++it) {
    376     base::MessageLoop::current()->PostTask(
    377         FROM_HERE,
    378         base::Bind(&FeedbackSender::OnReceiveDocumentMarkers,
    379                    AsWeakPtr(),
    380                    *it,
    381                    std::vector<uint32>()));
    382   }
    383 }
    384 
    385 void FeedbackSender::FlushFeedback() {
    386   if (feedback_.Empty())
    387     return;
    388   feedback_.FinalizeAllMisspellings();
    389   SendFeedback(feedback_.GetAllMisspellings(),
    390                renderers_sent_feedback_.empty());
    391   feedback_.Clear();
    392   renderers_sent_feedback_.clear();
    393   session_start_ = base::Time::Now();
    394   timer_.Reset();
    395 }
    396 
    397 void FeedbackSender::SendFeedback(const std::vector<Misspelling>& feedback_data,
    398                                   bool is_first_feedback_batch) {
    399   scoped_ptr<base::Value> feedback_value(BuildFeedbackValue(
    400       BuildParams(BuildSuggestionInfo(feedback_data, is_first_feedback_batch),
    401                   language_,
    402                   country_),
    403       api_version_));
    404   std::string feedback;
    405   base::JSONWriter::Write(feedback_value.get(), &feedback);
    406 
    407   // The tests use this identifier to mock the URL fetcher.
    408   static const int kUrlFetcherId = 0;
    409   net::URLFetcher* sender = net::URLFetcher::Create(
    410       kUrlFetcherId, feedback_service_url_, net::URLFetcher::POST, this);
    411   sender->SetLoadFlags(net::LOAD_DO_NOT_SEND_COOKIES |
    412                        net::LOAD_DO_NOT_SAVE_COOKIES);
    413   sender->SetUploadData("application/json", feedback);
    414   senders_.push_back(sender);
    415 
    416   // Request context is NULL in testing.
    417   if (request_context_.get()) {
    418     sender->SetRequestContext(request_context_.get());
    419     sender->Start();
    420   }
    421 }
    422 
    423 }  // namespace spellcheck
    424