Home | History | Annotate | Download | only in spellchecker
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
      6 
      7 #include <functional>
      8 
      9 #include "base/file_util.h"
     10 #include "base/files/important_file_writer.h"
     11 #include "base/md5.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_split.h"
     14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
     15 #include "chrome/common/chrome_constants.h"
     16 #include "chrome/common/spellcheck_messages.h"
     17 #include "content/public/browser/browser_thread.h"
     18 #include "sync/api/sync_change.h"
     19 #include "sync/api/sync_data.h"
     20 #include "sync/api/sync_error_factory.h"
     21 #include "sync/protocol/sync.pb.h"
     22 
     23 using content::BrowserThread;
     24 using chrome::spellcheck_common::WordList;
     25 using chrome::spellcheck_common::WordSet;
     26 
     27 namespace {
     28 
     29 // Filename extension for backup dictionary file.
     30 const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup");
     31 
     32 // Prefix for the checksum in the dictionary file.
     33 const char CHECKSUM_PREFIX[] = "checksum_v1 = ";
     34 
     35 // The status of the checksum in a custom spellcheck dictionary.
     36 enum ChecksumStatus {
     37   VALID_CHECKSUM,
     38   INVALID_CHECKSUM,
     39 };
     40 
     41 // The result of a dictionary sanitation. Can be used as a bitmap.
     42 enum ChangeSanitationResult {
     43   // The change is valid and can be applied as-is.
     44   VALID_CHANGE = 0,
     45 
     46   // The change contained words to be added that are not valid.
     47   DETECTED_INVALID_WORDS = 1,
     48 
     49   // The change contained words to be added that are already in the dictionary.
     50   DETECTED_DUPLICATE_WORDS = 2,
     51 
     52   // The change contained words to be removed that are not in the dictionary.
     53   DETECTED_MISSING_WORDS = 4,
     54 };
     55 
     56 // Loads the file at |file_path| into the |words| container. If the file has a
     57 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
     58 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
     59 ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) {
     60   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
     61   words.clear();
     62   std::string contents;
     63   file_util::ReadFileToString(file_path, &contents);
     64   size_t pos = contents.rfind(CHECKSUM_PREFIX);
     65   if (pos != std::string::npos) {
     66     std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX));
     67     contents = contents.substr(0, pos);
     68     if (checksum != base::MD5String(contents))
     69       return INVALID_CHECKSUM;
     70   }
     71   TrimWhitespaceASCII(contents, TRIM_ALL, &contents);
     72   base::SplitString(contents, '\n', &words);
     73   return VALID_CHECKSUM;
     74 }
     75 
     76 // Returns true for invalid words and false for valid words.
     77 bool IsInvalidWord(const std::string& word) {
     78   std::string tmp;
     79   return !IsStringUTF8(word) ||
     80       word.length() >
     81           chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES ||
     82       word.empty() ||
     83       TRIM_NONE != TrimWhitespaceASCII(word, TRIM_ALL, &tmp);
     84 }
     85 
     86 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
     87 // the dictionary checksum is not valid, but backup checksum is valid, then
     88 // restores the backup and loads that into |custom_words| instead. If the backup
     89 // is invalid too, then clears |custom_words|. Must be called on the file
     90 // thread.
     91 void LoadDictionaryFileReliably(WordList& custom_words,
     92                                 const base::FilePath& path) {
     93   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
     94   // Load the contents and verify the checksum.
     95   if (LoadFile(path, custom_words) == VALID_CHECKSUM)
     96     return;
     97   // Checksum is not valid. See if there's a backup.
     98   base::FilePath backup = path.AddExtension(BACKUP_EXTENSION);
     99   if (!base::PathExists(backup))
    100     return;
    101   // Load the backup and verify its checksum.
    102   if (LoadFile(backup, custom_words) != VALID_CHECKSUM)
    103     return;
    104   // Backup checksum is valid. Restore the backup.
    105   base::CopyFile(backup, path);
    106 }
    107 
    108 // Backs up the original dictionary, saves |custom_words| and its checksum into
    109 // the custom spellcheck dictionary at |path|.
    110 void SaveDictionaryFileReliably(
    111     const WordList& custom_words,
    112     const base::FilePath& path) {
    113   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    114   std::stringstream content;
    115   for (WordList::const_iterator it = custom_words.begin();
    116        it != custom_words.end();
    117        ++it) {
    118     content << *it << '\n';
    119   }
    120   std::string checksum = base::MD5String(content.str());
    121   content << CHECKSUM_PREFIX << checksum;
    122   base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION));
    123   base::ImportantFileWriter::WriteFileAtomically(path, content.str());
    124 }
    125 
    126 // Removes duplicate and invalid words from |to_add| word list and sorts it.
    127 // Looks for duplicates in both |to_add| and |existing| word lists. Returns a
    128 // bitmap of |ChangeSanitationResult| values.
    129 int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) {
    130   // Do not add duplicate words.
    131   std::sort(to_add.begin(), to_add.end());
    132   WordList new_words;
    133   std::set_difference(to_add.begin(),
    134                       to_add.end(),
    135                       existing.begin(),
    136                       existing.end(),
    137                       std::back_inserter(new_words));
    138   new_words.erase(std::unique(new_words.begin(), new_words.end()),
    139                   new_words.end());
    140   int result = VALID_CHANGE;
    141   if (to_add.size() != new_words.size())
    142     result |= DETECTED_DUPLICATE_WORDS;
    143   // Do not add invalid words.
    144   size_t size = new_words.size();
    145   new_words.erase(std::remove_if(new_words.begin(),
    146                                  new_words.end(),
    147                                  IsInvalidWord),
    148                   new_words.end());
    149   if (size != new_words.size())
    150     result |= DETECTED_INVALID_WORDS;
    151   // Save the sanitized words to be added.
    152   std::swap(to_add, new_words);
    153   return result;
    154 }
    155 
    156 // Removes word from |to_remove| that are missing from |existing| word list and
    157 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
    158 int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) {
    159   // Do not remove words that are missing from the dictionary.
    160   std::sort(to_remove.begin(), to_remove.end());
    161   WordList found_words;
    162   std::set_intersection(existing.begin(),
    163                         existing.end(),
    164                         to_remove.begin(),
    165                         to_remove.end(),
    166                         std::back_inserter(found_words));
    167   int result = VALID_CHANGE;
    168   if (to_remove.size() > found_words.size())
    169     result |= DETECTED_MISSING_WORDS;
    170   // Save the sanitized words to be removed.
    171   std::swap(to_remove, found_words);
    172   return result;
    173 }
    174 
    175 }  // namespace
    176 
    177 
    178 SpellcheckCustomDictionary::Change::Change() {
    179 }
    180 
    181 SpellcheckCustomDictionary::Change::Change(
    182     const SpellcheckCustomDictionary::Change& other)
    183     : to_add_(other.to_add()),
    184       to_remove_(other.to_remove()) {
    185 }
    186 
    187 SpellcheckCustomDictionary::Change::Change(const WordList& to_add)
    188     : to_add_(to_add) {
    189 }
    190 
    191 SpellcheckCustomDictionary::Change::~Change() {
    192 }
    193 
    194 void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) {
    195   to_add_.push_back(word);
    196 }
    197 
    198 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) {
    199   to_remove_.push_back(word);
    200 }
    201 
    202 int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) {
    203   int result = VALID_CHANGE;
    204   if (!to_add_.empty())
    205     result |= SanitizeWordsToAdd(words, to_add_);
    206   if (!to_remove_.empty())
    207     result |= SanitizeWordsToRemove(words, to_remove_);
    208   return result;
    209 }
    210 
    211 const WordList& SpellcheckCustomDictionary::Change::to_add() const {
    212   return to_add_;
    213 }
    214 
    215 const WordList& SpellcheckCustomDictionary::Change::to_remove() const {
    216   return to_remove_;
    217 }
    218 
    219 bool SpellcheckCustomDictionary::Change::empty() const {
    220   return to_add_.empty() && to_remove_.empty();
    221 }
    222 
    223 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
    224     const base::FilePath& path)
    225     : custom_dictionary_path_(),
    226       weak_ptr_factory_(this),
    227       is_loaded_(false) {
    228   custom_dictionary_path_ =
    229       path.Append(chrome::kCustomDictionaryFileName);
    230 }
    231 
    232 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
    233 }
    234 
    235 const WordSet& SpellcheckCustomDictionary::GetWords() const {
    236   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    237   return words_;
    238 }
    239 
    240 bool SpellcheckCustomDictionary::AddWord(const std::string& word) {
    241   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    242   Change dictionary_change;
    243   dictionary_change.AddWord(word);
    244   int result = dictionary_change.Sanitize(GetWords());
    245   Apply(dictionary_change);
    246   Notify(dictionary_change);
    247   Sync(dictionary_change);
    248   Save(dictionary_change);
    249   return result == VALID_CHANGE;
    250 }
    251 
    252 bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) {
    253   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    254   Change dictionary_change;
    255   dictionary_change.RemoveWord(word);
    256   int result = dictionary_change.Sanitize(GetWords());
    257   Apply(dictionary_change);
    258   Notify(dictionary_change);
    259   Sync(dictionary_change);
    260   Save(dictionary_change);
    261   return result == VALID_CHANGE;
    262 }
    263 
    264 bool SpellcheckCustomDictionary::HasWord(const std::string& word) const {
    265   return !!words_.count(word);
    266 }
    267 
    268 void SpellcheckCustomDictionary::AddObserver(Observer* observer) {
    269   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    270   observers_.AddObserver(observer);
    271 }
    272 
    273 void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) {
    274   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    275   observers_.RemoveObserver(observer);
    276 }
    277 
    278 bool SpellcheckCustomDictionary::IsLoaded() {
    279   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    280   return is_loaded_;
    281 }
    282 
    283 bool SpellcheckCustomDictionary::IsSyncing() {
    284   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    285   return !!sync_processor_.get();
    286 }
    287 
    288 void SpellcheckCustomDictionary::Load() {
    289   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    290   BrowserThread::PostTaskAndReplyWithResult(
    291       BrowserThread::FILE,
    292       FROM_HERE,
    293       base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile,
    294                  custom_dictionary_path_),
    295       base::Bind(&SpellcheckCustomDictionary::OnLoaded,
    296                  weak_ptr_factory_.GetWeakPtr()));
    297 }
    298 
    299 syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing(
    300     syncer::ModelType type,
    301     const syncer::SyncDataList& initial_sync_data,
    302     scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
    303     scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) {
    304   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    305   DCHECK(!sync_processor_.get());
    306   DCHECK(!sync_error_handler_.get());
    307   DCHECK(sync_processor.get());
    308   DCHECK(sync_error_handler.get());
    309   DCHECK_EQ(syncer::DICTIONARY, type);
    310   sync_processor_ = sync_processor.Pass();
    311   sync_error_handler_ = sync_error_handler.Pass();
    312 
    313   // Build a list of words to add locally.
    314   WordList to_add_locally;
    315   for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin();
    316        it != initial_sync_data.end();
    317        ++it) {
    318     DCHECK_EQ(syncer::DICTIONARY, it->GetDataType());
    319     to_add_locally.push_back(it->GetSpecifics().dictionary().word());
    320   }
    321 
    322   // Add remote words locally.
    323   Change to_change_locally(to_add_locally);
    324   to_change_locally.Sanitize(GetWords());
    325   Apply(to_change_locally);
    326   Notify(to_change_locally);
    327   Save(to_change_locally);
    328 
    329   // Add as many as possible local words remotely.
    330   std::sort(to_add_locally.begin(), to_add_locally.end());
    331   WordList to_add_remotely;
    332   std::set_difference(words_.begin(),
    333                       words_.end(),
    334                       to_add_locally.begin(),
    335                       to_add_locally.end(),
    336                       std::back_inserter(to_add_remotely));
    337 
    338   // Send local changes to the sync server.
    339   Change to_change_remotely(to_add_remotely);
    340   syncer::SyncMergeResult result(type);
    341   result.set_error(Sync(to_change_remotely));
    342   return result;
    343 }
    344 
    345 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) {
    346   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    347   DCHECK_EQ(syncer::DICTIONARY, type);
    348   sync_processor_.reset();
    349   sync_error_handler_.reset();
    350 }
    351 
    352 syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData(
    353     syncer::ModelType type) const {
    354   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    355   DCHECK_EQ(syncer::DICTIONARY, type);
    356   syncer::SyncDataList data;
    357   std::string word;
    358   size_t i = 0;
    359   for (WordSet::const_iterator it = words_.begin();
    360        it != words_.end() &&
    361            i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS;
    362        ++it, ++i) {
    363     word = *it;
    364     sync_pb::EntitySpecifics specifics;
    365     specifics.mutable_dictionary()->set_word(word);
    366     data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics));
    367   }
    368   return data;
    369 }
    370 
    371 syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges(
    372     const tracked_objects::Location& from_here,
    373     const syncer::SyncChangeList& change_list) {
    374   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    375   Change dictionary_change;
    376   for (syncer::SyncChangeList::const_iterator it = change_list.begin();
    377        it != change_list.end();
    378        ++it) {
    379     DCHECK(it->IsValid());
    380     std::string word = it->sync_data().GetSpecifics().dictionary().word();
    381     switch (it->change_type()) {
    382       case syncer::SyncChange::ACTION_ADD:
    383         dictionary_change.AddWord(word);
    384         break;
    385       case syncer::SyncChange::ACTION_DELETE:
    386         dictionary_change.RemoveWord(word);
    387         break;
    388       default:
    389         return sync_error_handler_->CreateAndUploadError(
    390             FROM_HERE,
    391             "Processing sync changes failed on change type " +
    392                 syncer::SyncChange::ChangeTypeToString(it->change_type()));
    393     }
    394   }
    395 
    396   dictionary_change.Sanitize(GetWords());
    397   Apply(dictionary_change);
    398   Notify(dictionary_change);
    399   Save(dictionary_change);
    400 
    401   return syncer::SyncError();
    402 }
    403 
    404 // static
    405 WordList SpellcheckCustomDictionary::LoadDictionaryFile(
    406     const base::FilePath& path) {
    407   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    408   WordList words;
    409   LoadDictionaryFileReliably(words, path);
    410   if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words))
    411     SaveDictionaryFileReliably(words, path);
    412   SpellCheckHostMetrics::RecordCustomWordCountStats(words.size());
    413   return words;
    414 }
    415 
    416 // static
    417 void SpellcheckCustomDictionary::UpdateDictionaryFile(
    418     const SpellcheckCustomDictionary::Change& dictionary_change,
    419     const base::FilePath& path) {
    420   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    421   if (dictionary_change.empty())
    422     return;
    423 
    424   WordList custom_words;
    425   LoadDictionaryFileReliably(custom_words, path);
    426 
    427   // Add words.
    428   custom_words.insert(custom_words.end(),
    429                       dictionary_change.to_add().begin(),
    430                       dictionary_change.to_add().end());
    431 
    432   // Remove words.
    433   std::sort(custom_words.begin(), custom_words.end());
    434   WordList remaining;
    435   std::set_difference(custom_words.begin(),
    436                       custom_words.end(),
    437                       dictionary_change.to_remove().begin(),
    438                       dictionary_change.to_remove().end(),
    439                       std::back_inserter(remaining));
    440   std::swap(custom_words, remaining);
    441 
    442   SaveDictionaryFileReliably(custom_words, path);
    443 }
    444 
    445 void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) {
    446   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    447   Change dictionary_change(custom_words);
    448   dictionary_change.Sanitize(GetWords());
    449   Apply(dictionary_change);
    450   Sync(dictionary_change);
    451   is_loaded_ = true;
    452   FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded());
    453 }
    454 
    455 void SpellcheckCustomDictionary::Apply(
    456     const SpellcheckCustomDictionary::Change& dictionary_change) {
    457   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    458   if (!dictionary_change.to_add().empty()) {
    459     words_.insert(dictionary_change.to_add().begin(),
    460                   dictionary_change.to_add().end());
    461   }
    462   if (!dictionary_change.to_remove().empty()) {
    463     WordSet updated_words;
    464     std::set_difference(words_.begin(),
    465                         words_.end(),
    466                         dictionary_change.to_remove().begin(),
    467                         dictionary_change.to_remove().end(),
    468                         std::inserter(updated_words, updated_words.end()));
    469     std::swap(words_, updated_words);
    470   }
    471 }
    472 
    473 void SpellcheckCustomDictionary::Save(
    474     const SpellcheckCustomDictionary::Change& dictionary_change) {
    475   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    476   BrowserThread::PostTask(
    477       BrowserThread::FILE,
    478       FROM_HERE,
    479       base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile,
    480                  dictionary_change,
    481                  custom_dictionary_path_));
    482 }
    483 
    484 syncer::SyncError SpellcheckCustomDictionary::Sync(
    485     const SpellcheckCustomDictionary::Change& dictionary_change) {
    486   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    487   syncer::SyncError error;
    488   if (!IsSyncing() || dictionary_change.empty())
    489     return error;
    490 
    491   // The number of words on the sync server should not exceed the limits.
    492   int server_size = static_cast<int>(words_.size()) -
    493       static_cast<int>(dictionary_change.to_add().size());
    494   int max_upload_size = std::max(
    495       0,
    496       static_cast<int>(
    497           chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) -
    498           server_size);
    499   int upload_size = std::min(
    500       static_cast<int>(dictionary_change.to_add().size()),
    501       max_upload_size);
    502 
    503   syncer::SyncChangeList sync_change_list;
    504   int i = 0;
    505 
    506   for (WordList::const_iterator it = dictionary_change.to_add().begin();
    507        it != dictionary_change.to_add().end() && i < upload_size;
    508        ++it, ++i) {
    509     std::string word = *it;
    510     sync_pb::EntitySpecifics specifics;
    511     specifics.mutable_dictionary()->set_word(word);
    512     sync_change_list.push_back(syncer::SyncChange(
    513         FROM_HERE,
    514         syncer::SyncChange::ACTION_ADD,
    515         syncer::SyncData::CreateLocalData(word, word, specifics)));
    516   }
    517 
    518   for (WordList::const_iterator it = dictionary_change.to_remove().begin();
    519        it != dictionary_change.to_remove().end();
    520        ++it) {
    521     std::string word = *it;
    522     sync_pb::EntitySpecifics specifics;
    523     specifics.mutable_dictionary()->set_word(word);
    524     sync_change_list.push_back(syncer::SyncChange(
    525         FROM_HERE,
    526         syncer::SyncChange::ACTION_DELETE,
    527         syncer::SyncData::CreateLocalData(word, word, specifics)));
    528   }
    529 
    530   // Send the changes to the sync processor.
    531   error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list);
    532   if (error.IsSet())
    533     return error;
    534 
    535   // Turn off syncing of this dictionary if the server already has the maximum
    536   // number of words.
    537   if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS)
    538     StopSyncing(syncer::DICTIONARY);
    539 
    540   return error;
    541 }
    542 
    543 void SpellcheckCustomDictionary::Notify(
    544     const SpellcheckCustomDictionary::Change& dictionary_change) {
    545   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    546   if (!IsLoaded() || dictionary_change.empty())
    547     return;
    548   FOR_EACH_OBSERVER(Observer,
    549                     observers_,
    550                     OnCustomDictionaryChanged(dictionary_change));
    551 }
    552