Home | History | Annotate | Download | only in spellchecker
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
      6 
      7 #include <functional>
      8 
      9 #include "base/files/file_util.h"
     10 #include "base/files/important_file_writer.h"
     11 #include "base/md5.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_split.h"
     14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
     15 #include "chrome/common/chrome_constants.h"
     16 #include "chrome/common/spellcheck_messages.h"
     17 #include "content/public/browser/browser_thread.h"
     18 #include "sync/api/sync_change.h"
     19 #include "sync/api/sync_data.h"
     20 #include "sync/api/sync_error_factory.h"
     21 #include "sync/protocol/sync.pb.h"
     22 
     23 using content::BrowserThread;
     24 using chrome::spellcheck_common::WordList;
     25 using chrome::spellcheck_common::WordSet;
     26 
     27 namespace {
     28 
     29 // Filename extension for backup dictionary file.
     30 const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup");
     31 
     32 // Prefix for the checksum in the dictionary file.
     33 const char CHECKSUM_PREFIX[] = "checksum_v1 = ";
     34 
     35 // The status of the checksum in a custom spellcheck dictionary.
     36 enum ChecksumStatus {
     37   VALID_CHECKSUM,
     38   INVALID_CHECKSUM,
     39 };
     40 
     41 // The result of a dictionary sanitation. Can be used as a bitmap.
     42 enum ChangeSanitationResult {
     43   // The change is valid and can be applied as-is.
     44   VALID_CHANGE = 0,
     45 
     46   // The change contained words to be added that are not valid.
     47   DETECTED_INVALID_WORDS = 1,
     48 
     49   // The change contained words to be added that are already in the dictionary.
     50   DETECTED_DUPLICATE_WORDS = 2,
     51 
     52   // The change contained words to be removed that are not in the dictionary.
     53   DETECTED_MISSING_WORDS = 4,
     54 };
     55 
     56 // Loads the file at |file_path| into the |words| container. If the file has a
     57 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
     58 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
     59 ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) {
     60   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
     61   words.clear();
     62   std::string contents;
     63   base::ReadFileToString(file_path, &contents);
     64   size_t pos = contents.rfind(CHECKSUM_PREFIX);
     65   if (pos != std::string::npos) {
     66     std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX));
     67     contents = contents.substr(0, pos);
     68     if (checksum != base::MD5String(contents))
     69       return INVALID_CHECKSUM;
     70   }
     71   base::TrimWhitespaceASCII(contents, base::TRIM_ALL, &contents);
     72   base::SplitString(contents, '\n', &words);
     73   return VALID_CHECKSUM;
     74 }
     75 
     76 // Returns true for invalid words and false for valid words.
     77 bool IsInvalidWord(const std::string& word) {
     78   std::string tmp;
     79   return !base::IsStringUTF8(word) ||
     80       word.length() >
     81           chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES ||
     82       word.empty() ||
     83       base::TRIM_NONE != base::TrimWhitespaceASCII(word, base::TRIM_ALL, &tmp);
     84 }
     85 
     86 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
     87 // the dictionary checksum is not valid, but backup checksum is valid, then
     88 // restores the backup and loads that into |custom_words| instead. If the backup
     89 // is invalid too, then clears |custom_words|. Must be called on the file
     90 // thread.
     91 void LoadDictionaryFileReliably(WordList& custom_words,
     92                                 const base::FilePath& path) {
     93   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
     94   // Load the contents and verify the checksum.
     95   if (LoadFile(path, custom_words) == VALID_CHECKSUM)
     96     return;
     97   // Checksum is not valid. See if there's a backup.
     98   base::FilePath backup = path.AddExtension(BACKUP_EXTENSION);
     99   if (!base::PathExists(backup))
    100     return;
    101   // Load the backup and verify its checksum.
    102   if (LoadFile(backup, custom_words) != VALID_CHECKSUM)
    103     return;
    104   // Backup checksum is valid. Restore the backup.
    105   base::CopyFile(backup, path);
    106 }
    107 
    108 // Backs up the original dictionary, saves |custom_words| and its checksum into
    109 // the custom spellcheck dictionary at |path|.
    110 void SaveDictionaryFileReliably(
    111     const WordList& custom_words,
    112     const base::FilePath& path) {
    113   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    114   std::stringstream content;
    115   for (WordList::const_iterator it = custom_words.begin();
    116        it != custom_words.end();
    117        ++it) {
    118     content << *it << '\n';
    119   }
    120   std::string checksum = base::MD5String(content.str());
    121   content << CHECKSUM_PREFIX << checksum;
    122   base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION));
    123   base::ImportantFileWriter::WriteFileAtomically(path, content.str());
    124 }
    125 
    126 // Removes duplicate and invalid words from |to_add| word list and sorts it.
    127 // Looks for duplicates in both |to_add| and |existing| word lists. Returns a
    128 // bitmap of |ChangeSanitationResult| values.
    129 int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) {
    130   // Do not add duplicate words.
    131   std::sort(to_add.begin(), to_add.end());
    132   WordList new_words = base::STLSetDifference<WordList>(to_add, existing);
    133   new_words.erase(std::unique(new_words.begin(), new_words.end()),
    134                   new_words.end());
    135   int result = VALID_CHANGE;
    136   if (to_add.size() != new_words.size())
    137     result |= DETECTED_DUPLICATE_WORDS;
    138   // Do not add invalid words.
    139   size_t size = new_words.size();
    140   new_words.erase(std::remove_if(new_words.begin(),
    141                                  new_words.end(),
    142                                  IsInvalidWord),
    143                   new_words.end());
    144   if (size != new_words.size())
    145     result |= DETECTED_INVALID_WORDS;
    146   // Save the sanitized words to be added.
    147   std::swap(to_add, new_words);
    148   return result;
    149 }
    150 
    151 // Removes word from |to_remove| that are missing from |existing| word list and
    152 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
    153 int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) {
    154   // Do not remove words that are missing from the dictionary.
    155   std::sort(to_remove.begin(), to_remove.end());
    156   WordList found_words;
    157   std::set_intersection(existing.begin(),
    158                         existing.end(),
    159                         to_remove.begin(),
    160                         to_remove.end(),
    161                         std::back_inserter(found_words));
    162   int result = VALID_CHANGE;
    163   if (to_remove.size() > found_words.size())
    164     result |= DETECTED_MISSING_WORDS;
    165   // Save the sanitized words to be removed.
    166   std::swap(to_remove, found_words);
    167   return result;
    168 }
    169 
    170 }  // namespace
    171 
    172 
    173 SpellcheckCustomDictionary::Change::Change() {
    174 }
    175 
    176 SpellcheckCustomDictionary::Change::Change(
    177     const SpellcheckCustomDictionary::Change& other)
    178     : to_add_(other.to_add()),
    179       to_remove_(other.to_remove()) {
    180 }
    181 
    182 SpellcheckCustomDictionary::Change::Change(const WordList& to_add)
    183     : to_add_(to_add) {
    184 }
    185 
    186 SpellcheckCustomDictionary::Change::~Change() {
    187 }
    188 
    189 void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) {
    190   to_add_.push_back(word);
    191 }
    192 
    193 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) {
    194   to_remove_.push_back(word);
    195 }
    196 
    197 int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) {
    198   int result = VALID_CHANGE;
    199   if (!to_add_.empty())
    200     result |= SanitizeWordsToAdd(words, to_add_);
    201   if (!to_remove_.empty())
    202     result |= SanitizeWordsToRemove(words, to_remove_);
    203   return result;
    204 }
    205 
    206 const WordList& SpellcheckCustomDictionary::Change::to_add() const {
    207   return to_add_;
    208 }
    209 
    210 const WordList& SpellcheckCustomDictionary::Change::to_remove() const {
    211   return to_remove_;
    212 }
    213 
    214 bool SpellcheckCustomDictionary::Change::empty() const {
    215   return to_add_.empty() && to_remove_.empty();
    216 }
    217 
    218 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
    219     const base::FilePath& path)
    220     : custom_dictionary_path_(),
    221       is_loaded_(false),
    222       weak_ptr_factory_(this) {
    223   custom_dictionary_path_ =
    224       path.Append(chrome::kCustomDictionaryFileName);
    225 }
    226 
    227 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
    228 }
    229 
    230 const WordSet& SpellcheckCustomDictionary::GetWords() const {
    231   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    232   return words_;
    233 }
    234 
    235 bool SpellcheckCustomDictionary::AddWord(const std::string& word) {
    236   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    237   Change dictionary_change;
    238   dictionary_change.AddWord(word);
    239   int result = dictionary_change.Sanitize(GetWords());
    240   Apply(dictionary_change);
    241   Notify(dictionary_change);
    242   Sync(dictionary_change);
    243   Save(dictionary_change);
    244   return result == VALID_CHANGE;
    245 }
    246 
    247 bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) {
    248   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    249   Change dictionary_change;
    250   dictionary_change.RemoveWord(word);
    251   int result = dictionary_change.Sanitize(GetWords());
    252   Apply(dictionary_change);
    253   Notify(dictionary_change);
    254   Sync(dictionary_change);
    255   Save(dictionary_change);
    256   return result == VALID_CHANGE;
    257 }
    258 
    259 bool SpellcheckCustomDictionary::HasWord(const std::string& word) const {
    260   return !!words_.count(word);
    261 }
    262 
    263 void SpellcheckCustomDictionary::AddObserver(Observer* observer) {
    264   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    265   observers_.AddObserver(observer);
    266 }
    267 
    268 void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) {
    269   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    270   observers_.RemoveObserver(observer);
    271 }
    272 
    273 bool SpellcheckCustomDictionary::IsLoaded() {
    274   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    275   return is_loaded_;
    276 }
    277 
    278 bool SpellcheckCustomDictionary::IsSyncing() {
    279   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    280   return !!sync_processor_.get();
    281 }
    282 
    283 void SpellcheckCustomDictionary::Load() {
    284   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    285   BrowserThread::PostTaskAndReplyWithResult(
    286       BrowserThread::FILE,
    287       FROM_HERE,
    288       base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile,
    289                  custom_dictionary_path_),
    290       base::Bind(&SpellcheckCustomDictionary::OnLoaded,
    291                  weak_ptr_factory_.GetWeakPtr()));
    292 }
    293 
    294 syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing(
    295     syncer::ModelType type,
    296     const syncer::SyncDataList& initial_sync_data,
    297     scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
    298     scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) {
    299   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    300   DCHECK(!sync_processor_.get());
    301   DCHECK(!sync_error_handler_.get());
    302   DCHECK(sync_processor.get());
    303   DCHECK(sync_error_handler.get());
    304   DCHECK_EQ(syncer::DICTIONARY, type);
    305   sync_processor_ = sync_processor.Pass();
    306   sync_error_handler_ = sync_error_handler.Pass();
    307 
    308   // Build a list of words to add locally.
    309   WordList to_add_locally;
    310   for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin();
    311        it != initial_sync_data.end();
    312        ++it) {
    313     DCHECK_EQ(syncer::DICTIONARY, it->GetDataType());
    314     to_add_locally.push_back(it->GetSpecifics().dictionary().word());
    315   }
    316 
    317   // Add remote words locally.
    318   Change to_change_locally(to_add_locally);
    319   to_change_locally.Sanitize(GetWords());
    320   Apply(to_change_locally);
    321   Notify(to_change_locally);
    322   Save(to_change_locally);
    323 
    324   // Add as many as possible local words remotely.
    325   std::sort(to_add_locally.begin(), to_add_locally.end());
    326   WordList to_add_remotely = base::STLSetDifference<WordList>(words_,
    327                                                               to_add_locally);
    328 
    329   // Send local changes to the sync server.
    330   Change to_change_remotely(to_add_remotely);
    331   syncer::SyncMergeResult result(type);
    332   result.set_error(Sync(to_change_remotely));
    333   return result;
    334 }
    335 
    336 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) {
    337   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    338   DCHECK_EQ(syncer::DICTIONARY, type);
    339   sync_processor_.reset();
    340   sync_error_handler_.reset();
    341 }
    342 
    343 syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData(
    344     syncer::ModelType type) const {
    345   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    346   DCHECK_EQ(syncer::DICTIONARY, type);
    347   syncer::SyncDataList data;
    348   std::string word;
    349   size_t i = 0;
    350   for (WordSet::const_iterator it = words_.begin();
    351        it != words_.end() &&
    352            i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS;
    353        ++it, ++i) {
    354     word = *it;
    355     sync_pb::EntitySpecifics specifics;
    356     specifics.mutable_dictionary()->set_word(word);
    357     data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics));
    358   }
    359   return data;
    360 }
    361 
    362 syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges(
    363     const tracked_objects::Location& from_here,
    364     const syncer::SyncChangeList& change_list) {
    365   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    366   Change dictionary_change;
    367   for (syncer::SyncChangeList::const_iterator it = change_list.begin();
    368        it != change_list.end();
    369        ++it) {
    370     DCHECK(it->IsValid());
    371     std::string word = it->sync_data().GetSpecifics().dictionary().word();
    372     switch (it->change_type()) {
    373       case syncer::SyncChange::ACTION_ADD:
    374         dictionary_change.AddWord(word);
    375         break;
    376       case syncer::SyncChange::ACTION_DELETE:
    377         dictionary_change.RemoveWord(word);
    378         break;
    379       default:
    380         return sync_error_handler_->CreateAndUploadError(
    381             FROM_HERE,
    382             "Processing sync changes failed on change type " +
    383                 syncer::SyncChange::ChangeTypeToString(it->change_type()));
    384     }
    385   }
    386 
    387   dictionary_change.Sanitize(GetWords());
    388   Apply(dictionary_change);
    389   Notify(dictionary_change);
    390   Save(dictionary_change);
    391 
    392   return syncer::SyncError();
    393 }
    394 
    395 // static
    396 WordList SpellcheckCustomDictionary::LoadDictionaryFile(
    397     const base::FilePath& path) {
    398   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    399   WordList words;
    400   LoadDictionaryFileReliably(words, path);
    401   if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words))
    402     SaveDictionaryFileReliably(words, path);
    403   SpellCheckHostMetrics::RecordCustomWordCountStats(words.size());
    404   return words;
    405 }
    406 
    407 // static
    408 void SpellcheckCustomDictionary::UpdateDictionaryFile(
    409     const SpellcheckCustomDictionary::Change& dictionary_change,
    410     const base::FilePath& path) {
    411   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    412   if (dictionary_change.empty())
    413     return;
    414 
    415   WordList custom_words;
    416   LoadDictionaryFileReliably(custom_words, path);
    417 
    418   // Add words.
    419   custom_words.insert(custom_words.end(),
    420                       dictionary_change.to_add().begin(),
    421                       dictionary_change.to_add().end());
    422 
    423   // Remove words.
    424   std::sort(custom_words.begin(), custom_words.end());
    425   WordList remaining =
    426       base::STLSetDifference<WordList>(custom_words,
    427                                        dictionary_change.to_remove());
    428   std::swap(custom_words, remaining);
    429 
    430   SaveDictionaryFileReliably(custom_words, path);
    431 }
    432 
    433 void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) {
    434   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    435   Change dictionary_change(custom_words);
    436   dictionary_change.Sanitize(GetWords());
    437   Apply(dictionary_change);
    438   Sync(dictionary_change);
    439   is_loaded_ = true;
    440   FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded());
    441 }
    442 
    443 void SpellcheckCustomDictionary::Apply(
    444     const SpellcheckCustomDictionary::Change& dictionary_change) {
    445   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    446   if (!dictionary_change.to_add().empty()) {
    447     words_.insert(dictionary_change.to_add().begin(),
    448                   dictionary_change.to_add().end());
    449   }
    450   if (!dictionary_change.to_remove().empty()) {
    451     WordSet updated_words =
    452         base::STLSetDifference<WordSet>(words_,
    453                                         dictionary_change.to_remove());
    454     std::swap(words_, updated_words);
    455   }
    456 }
    457 
    458 void SpellcheckCustomDictionary::Save(
    459     const SpellcheckCustomDictionary::Change& dictionary_change) {
    460   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    461   BrowserThread::PostTask(
    462       BrowserThread::FILE,
    463       FROM_HERE,
    464       base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile,
    465                  dictionary_change,
    466                  custom_dictionary_path_));
    467 }
    468 
    469 syncer::SyncError SpellcheckCustomDictionary::Sync(
    470     const SpellcheckCustomDictionary::Change& dictionary_change) {
    471   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    472   syncer::SyncError error;
    473   if (!IsSyncing() || dictionary_change.empty())
    474     return error;
    475 
    476   // The number of words on the sync server should not exceed the limits.
    477   int server_size = static_cast<int>(words_.size()) -
    478       static_cast<int>(dictionary_change.to_add().size());
    479   int max_upload_size = std::max(
    480       0,
    481       static_cast<int>(
    482           chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) -
    483           server_size);
    484   int upload_size = std::min(
    485       static_cast<int>(dictionary_change.to_add().size()),
    486       max_upload_size);
    487 
    488   syncer::SyncChangeList sync_change_list;
    489   int i = 0;
    490 
    491   for (WordList::const_iterator it = dictionary_change.to_add().begin();
    492        it != dictionary_change.to_add().end() && i < upload_size;
    493        ++it, ++i) {
    494     std::string word = *it;
    495     sync_pb::EntitySpecifics specifics;
    496     specifics.mutable_dictionary()->set_word(word);
    497     sync_change_list.push_back(syncer::SyncChange(
    498         FROM_HERE,
    499         syncer::SyncChange::ACTION_ADD,
    500         syncer::SyncData::CreateLocalData(word, word, specifics)));
    501   }
    502 
    503   for (WordList::const_iterator it = dictionary_change.to_remove().begin();
    504        it != dictionary_change.to_remove().end();
    505        ++it) {
    506     std::string word = *it;
    507     sync_pb::EntitySpecifics specifics;
    508     specifics.mutable_dictionary()->set_word(word);
    509     sync_change_list.push_back(syncer::SyncChange(
    510         FROM_HERE,
    511         syncer::SyncChange::ACTION_DELETE,
    512         syncer::SyncData::CreateLocalData(word, word, specifics)));
    513   }
    514 
    515   // Send the changes to the sync processor.
    516   error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list);
    517   if (error.IsSet())
    518     return error;
    519 
    520   // Turn off syncing of this dictionary if the server already has the maximum
    521   // number of words.
    522   if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS)
    523     StopSyncing(syncer::DICTIONARY);
    524 
    525   return error;
    526 }
    527 
    528 void SpellcheckCustomDictionary::Notify(
    529     const SpellcheckCustomDictionary::Change& dictionary_change) {
    530   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    531   if (!IsLoaded() || dictionary_change.empty())
    532     return;
    533   FOR_EACH_OBSERVER(Observer,
    534                     observers_,
    535                     OnCustomDictionaryChanged(dictionary_change));
    536 }
    537