1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h" 6 7 #include <functional> 8 9 #include "base/file_util.h" 10 #include "base/files/important_file_writer.h" 11 #include "base/md5.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_split.h" 14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h" 15 #include "chrome/common/chrome_constants.h" 16 #include "chrome/common/spellcheck_messages.h" 17 #include "content/public/browser/browser_thread.h" 18 #include "sync/api/sync_change.h" 19 #include "sync/api/sync_data.h" 20 #include "sync/api/sync_error_factory.h" 21 #include "sync/protocol/sync.pb.h" 22 23 using content::BrowserThread; 24 using chrome::spellcheck_common::WordList; 25 using chrome::spellcheck_common::WordSet; 26 27 namespace { 28 29 // Filename extension for backup dictionary file. 30 const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup"); 31 32 // Prefix for the checksum in the dictionary file. 33 const char CHECKSUM_PREFIX[] = "checksum_v1 = "; 34 35 // The status of the checksum in a custom spellcheck dictionary. 36 enum ChecksumStatus { 37 VALID_CHECKSUM, 38 INVALID_CHECKSUM, 39 }; 40 41 // The result of a dictionary sanitation. Can be used as a bitmap. 42 enum ChangeSanitationResult { 43 // The change is valid and can be applied as-is. 44 VALID_CHANGE = 0, 45 46 // The change contained words to be added that are not valid. 47 DETECTED_INVALID_WORDS = 1, 48 49 // The change contained words to be added that are already in the dictionary. 50 DETECTED_DUPLICATE_WORDS = 2, 51 52 // The change contained words to be removed that are not in the dictionary. 53 DETECTED_MISSING_WORDS = 4, 54 }; 55 56 // Loads the file at |file_path| into the |words| container. If the file has a 57 // valid checksum, then returns ChecksumStatus::VALID. If the file has an 58 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|. 59 ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) { 60 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 61 words.clear(); 62 std::string contents; 63 file_util::ReadFileToString(file_path, &contents); 64 size_t pos = contents.rfind(CHECKSUM_PREFIX); 65 if (pos != std::string::npos) { 66 std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX)); 67 contents = contents.substr(0, pos); 68 if (checksum != base::MD5String(contents)) 69 return INVALID_CHECKSUM; 70 } 71 TrimWhitespaceASCII(contents, TRIM_ALL, &contents); 72 base::SplitString(contents, '\n', &words); 73 return VALID_CHECKSUM; 74 } 75 76 // Returns true for invalid words and false for valid words. 77 bool IsInvalidWord(const std::string& word) { 78 std::string tmp; 79 return !IsStringUTF8(word) || 80 word.length() > 81 chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES || 82 word.empty() || 83 TRIM_NONE != TrimWhitespaceASCII(word, TRIM_ALL, &tmp); 84 } 85 86 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If 87 // the dictionary checksum is not valid, but backup checksum is valid, then 88 // restores the backup and loads that into |custom_words| instead. If the backup 89 // is invalid too, then clears |custom_words|. Must be called on the file 90 // thread. 91 void LoadDictionaryFileReliably(WordList& custom_words, 92 const base::FilePath& path) { 93 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 94 // Load the contents and verify the checksum. 95 if (LoadFile(path, custom_words) == VALID_CHECKSUM) 96 return; 97 // Checksum is not valid. See if there's a backup. 98 base::FilePath backup = path.AddExtension(BACKUP_EXTENSION); 99 if (!base::PathExists(backup)) 100 return; 101 // Load the backup and verify its checksum. 102 if (LoadFile(backup, custom_words) != VALID_CHECKSUM) 103 return; 104 // Backup checksum is valid. Restore the backup. 105 base::CopyFile(backup, path); 106 } 107 108 // Backs up the original dictionary, saves |custom_words| and its checksum into 109 // the custom spellcheck dictionary at |path|. 110 void SaveDictionaryFileReliably( 111 const WordList& custom_words, 112 const base::FilePath& path) { 113 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 114 std::stringstream content; 115 for (WordList::const_iterator it = custom_words.begin(); 116 it != custom_words.end(); 117 ++it) { 118 content << *it << '\n'; 119 } 120 std::string checksum = base::MD5String(content.str()); 121 content << CHECKSUM_PREFIX << checksum; 122 base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION)); 123 base::ImportantFileWriter::WriteFileAtomically(path, content.str()); 124 } 125 126 // Removes duplicate and invalid words from |to_add| word list and sorts it. 127 // Looks for duplicates in both |to_add| and |existing| word lists. Returns a 128 // bitmap of |ChangeSanitationResult| values. 129 int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) { 130 // Do not add duplicate words. 131 std::sort(to_add.begin(), to_add.end()); 132 WordList new_words; 133 std::set_difference(to_add.begin(), 134 to_add.end(), 135 existing.begin(), 136 existing.end(), 137 std::back_inserter(new_words)); 138 new_words.erase(std::unique(new_words.begin(), new_words.end()), 139 new_words.end()); 140 int result = VALID_CHANGE; 141 if (to_add.size() != new_words.size()) 142 result |= DETECTED_DUPLICATE_WORDS; 143 // Do not add invalid words. 144 size_t size = new_words.size(); 145 new_words.erase(std::remove_if(new_words.begin(), 146 new_words.end(), 147 IsInvalidWord), 148 new_words.end()); 149 if (size != new_words.size()) 150 result |= DETECTED_INVALID_WORDS; 151 // Save the sanitized words to be added. 152 std::swap(to_add, new_words); 153 return result; 154 } 155 156 // Removes word from |to_remove| that are missing from |existing| word list and 157 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values. 158 int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) { 159 // Do not remove words that are missing from the dictionary. 160 std::sort(to_remove.begin(), to_remove.end()); 161 WordList found_words; 162 std::set_intersection(existing.begin(), 163 existing.end(), 164 to_remove.begin(), 165 to_remove.end(), 166 std::back_inserter(found_words)); 167 int result = VALID_CHANGE; 168 if (to_remove.size() > found_words.size()) 169 result |= DETECTED_MISSING_WORDS; 170 // Save the sanitized words to be removed. 171 std::swap(to_remove, found_words); 172 return result; 173 } 174 175 } // namespace 176 177 178 SpellcheckCustomDictionary::Change::Change() { 179 } 180 181 SpellcheckCustomDictionary::Change::Change( 182 const SpellcheckCustomDictionary::Change& other) 183 : to_add_(other.to_add()), 184 to_remove_(other.to_remove()) { 185 } 186 187 SpellcheckCustomDictionary::Change::Change(const WordList& to_add) 188 : to_add_(to_add) { 189 } 190 191 SpellcheckCustomDictionary::Change::~Change() { 192 } 193 194 void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) { 195 to_add_.push_back(word); 196 } 197 198 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) { 199 to_remove_.push_back(word); 200 } 201 202 int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) { 203 int result = VALID_CHANGE; 204 if (!to_add_.empty()) 205 result |= SanitizeWordsToAdd(words, to_add_); 206 if (!to_remove_.empty()) 207 result |= SanitizeWordsToRemove(words, to_remove_); 208 return result; 209 } 210 211 const WordList& SpellcheckCustomDictionary::Change::to_add() const { 212 return to_add_; 213 } 214 215 const WordList& SpellcheckCustomDictionary::Change::to_remove() const { 216 return to_remove_; 217 } 218 219 bool SpellcheckCustomDictionary::Change::empty() const { 220 return to_add_.empty() && to_remove_.empty(); 221 } 222 223 SpellcheckCustomDictionary::SpellcheckCustomDictionary( 224 const base::FilePath& path) 225 : custom_dictionary_path_(), 226 weak_ptr_factory_(this), 227 is_loaded_(false) { 228 custom_dictionary_path_ = 229 path.Append(chrome::kCustomDictionaryFileName); 230 } 231 232 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() { 233 } 234 235 const WordSet& SpellcheckCustomDictionary::GetWords() const { 236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 237 return words_; 238 } 239 240 bool SpellcheckCustomDictionary::AddWord(const std::string& word) { 241 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 242 Change dictionary_change; 243 dictionary_change.AddWord(word); 244 int result = dictionary_change.Sanitize(GetWords()); 245 Apply(dictionary_change); 246 Notify(dictionary_change); 247 Sync(dictionary_change); 248 Save(dictionary_change); 249 return result == VALID_CHANGE; 250 } 251 252 bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) { 253 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 254 Change dictionary_change; 255 dictionary_change.RemoveWord(word); 256 int result = dictionary_change.Sanitize(GetWords()); 257 Apply(dictionary_change); 258 Notify(dictionary_change); 259 Sync(dictionary_change); 260 Save(dictionary_change); 261 return result == VALID_CHANGE; 262 } 263 264 bool SpellcheckCustomDictionary::HasWord(const std::string& word) const { 265 return !!words_.count(word); 266 } 267 268 void SpellcheckCustomDictionary::AddObserver(Observer* observer) { 269 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 270 observers_.AddObserver(observer); 271 } 272 273 void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) { 274 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 275 observers_.RemoveObserver(observer); 276 } 277 278 bool SpellcheckCustomDictionary::IsLoaded() { 279 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 280 return is_loaded_; 281 } 282 283 bool SpellcheckCustomDictionary::IsSyncing() { 284 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 285 return !!sync_processor_.get(); 286 } 287 288 void SpellcheckCustomDictionary::Load() { 289 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 290 BrowserThread::PostTaskAndReplyWithResult( 291 BrowserThread::FILE, 292 FROM_HERE, 293 base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile, 294 custom_dictionary_path_), 295 base::Bind(&SpellcheckCustomDictionary::OnLoaded, 296 weak_ptr_factory_.GetWeakPtr())); 297 } 298 299 syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing( 300 syncer::ModelType type, 301 const syncer::SyncDataList& initial_sync_data, 302 scoped_ptr<syncer::SyncChangeProcessor> sync_processor, 303 scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) { 304 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 305 DCHECK(!sync_processor_.get()); 306 DCHECK(!sync_error_handler_.get()); 307 DCHECK(sync_processor.get()); 308 DCHECK(sync_error_handler.get()); 309 DCHECK_EQ(syncer::DICTIONARY, type); 310 sync_processor_ = sync_processor.Pass(); 311 sync_error_handler_ = sync_error_handler.Pass(); 312 313 // Build a list of words to add locally. 314 WordList to_add_locally; 315 for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin(); 316 it != initial_sync_data.end(); 317 ++it) { 318 DCHECK_EQ(syncer::DICTIONARY, it->GetDataType()); 319 to_add_locally.push_back(it->GetSpecifics().dictionary().word()); 320 } 321 322 // Add remote words locally. 323 Change to_change_locally(to_add_locally); 324 to_change_locally.Sanitize(GetWords()); 325 Apply(to_change_locally); 326 Notify(to_change_locally); 327 Save(to_change_locally); 328 329 // Add as many as possible local words remotely. 330 std::sort(to_add_locally.begin(), to_add_locally.end()); 331 WordList to_add_remotely; 332 std::set_difference(words_.begin(), 333 words_.end(), 334 to_add_locally.begin(), 335 to_add_locally.end(), 336 std::back_inserter(to_add_remotely)); 337 338 // Send local changes to the sync server. 339 Change to_change_remotely(to_add_remotely); 340 syncer::SyncMergeResult result(type); 341 result.set_error(Sync(to_change_remotely)); 342 return result; 343 } 344 345 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) { 346 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 347 DCHECK_EQ(syncer::DICTIONARY, type); 348 sync_processor_.reset(); 349 sync_error_handler_.reset(); 350 } 351 352 syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData( 353 syncer::ModelType type) const { 354 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 355 DCHECK_EQ(syncer::DICTIONARY, type); 356 syncer::SyncDataList data; 357 std::string word; 358 size_t i = 0; 359 for (WordSet::const_iterator it = words_.begin(); 360 it != words_.end() && 361 i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS; 362 ++it, ++i) { 363 word = *it; 364 sync_pb::EntitySpecifics specifics; 365 specifics.mutable_dictionary()->set_word(word); 366 data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics)); 367 } 368 return data; 369 } 370 371 syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges( 372 const tracked_objects::Location& from_here, 373 const syncer::SyncChangeList& change_list) { 374 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 375 Change dictionary_change; 376 for (syncer::SyncChangeList::const_iterator it = change_list.begin(); 377 it != change_list.end(); 378 ++it) { 379 DCHECK(it->IsValid()); 380 std::string word = it->sync_data().GetSpecifics().dictionary().word(); 381 switch (it->change_type()) { 382 case syncer::SyncChange::ACTION_ADD: 383 dictionary_change.AddWord(word); 384 break; 385 case syncer::SyncChange::ACTION_DELETE: 386 dictionary_change.RemoveWord(word); 387 break; 388 default: 389 return sync_error_handler_->CreateAndUploadError( 390 FROM_HERE, 391 "Processing sync changes failed on change type " + 392 syncer::SyncChange::ChangeTypeToString(it->change_type())); 393 } 394 } 395 396 dictionary_change.Sanitize(GetWords()); 397 Apply(dictionary_change); 398 Notify(dictionary_change); 399 Save(dictionary_change); 400 401 return syncer::SyncError(); 402 } 403 404 // static 405 WordList SpellcheckCustomDictionary::LoadDictionaryFile( 406 const base::FilePath& path) { 407 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 408 WordList words; 409 LoadDictionaryFileReliably(words, path); 410 if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words)) 411 SaveDictionaryFileReliably(words, path); 412 SpellCheckHostMetrics::RecordCustomWordCountStats(words.size()); 413 return words; 414 } 415 416 // static 417 void SpellcheckCustomDictionary::UpdateDictionaryFile( 418 const SpellcheckCustomDictionary::Change& dictionary_change, 419 const base::FilePath& path) { 420 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 421 if (dictionary_change.empty()) 422 return; 423 424 WordList custom_words; 425 LoadDictionaryFileReliably(custom_words, path); 426 427 // Add words. 428 custom_words.insert(custom_words.end(), 429 dictionary_change.to_add().begin(), 430 dictionary_change.to_add().end()); 431 432 // Remove words. 433 std::sort(custom_words.begin(), custom_words.end()); 434 WordList remaining; 435 std::set_difference(custom_words.begin(), 436 custom_words.end(), 437 dictionary_change.to_remove().begin(), 438 dictionary_change.to_remove().end(), 439 std::back_inserter(remaining)); 440 std::swap(custom_words, remaining); 441 442 SaveDictionaryFileReliably(custom_words, path); 443 } 444 445 void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) { 446 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 447 Change dictionary_change(custom_words); 448 dictionary_change.Sanitize(GetWords()); 449 Apply(dictionary_change); 450 Sync(dictionary_change); 451 is_loaded_ = true; 452 FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded()); 453 } 454 455 void SpellcheckCustomDictionary::Apply( 456 const SpellcheckCustomDictionary::Change& dictionary_change) { 457 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 458 if (!dictionary_change.to_add().empty()) { 459 words_.insert(dictionary_change.to_add().begin(), 460 dictionary_change.to_add().end()); 461 } 462 if (!dictionary_change.to_remove().empty()) { 463 WordSet updated_words; 464 std::set_difference(words_.begin(), 465 words_.end(), 466 dictionary_change.to_remove().begin(), 467 dictionary_change.to_remove().end(), 468 std::inserter(updated_words, updated_words.end())); 469 std::swap(words_, updated_words); 470 } 471 } 472 473 void SpellcheckCustomDictionary::Save( 474 const SpellcheckCustomDictionary::Change& dictionary_change) { 475 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 476 BrowserThread::PostTask( 477 BrowserThread::FILE, 478 FROM_HERE, 479 base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile, 480 dictionary_change, 481 custom_dictionary_path_)); 482 } 483 484 syncer::SyncError SpellcheckCustomDictionary::Sync( 485 const SpellcheckCustomDictionary::Change& dictionary_change) { 486 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 487 syncer::SyncError error; 488 if (!IsSyncing() || dictionary_change.empty()) 489 return error; 490 491 // The number of words on the sync server should not exceed the limits. 492 int server_size = static_cast<int>(words_.size()) - 493 static_cast<int>(dictionary_change.to_add().size()); 494 int max_upload_size = std::max( 495 0, 496 static_cast<int>( 497 chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) - 498 server_size); 499 int upload_size = std::min( 500 static_cast<int>(dictionary_change.to_add().size()), 501 max_upload_size); 502 503 syncer::SyncChangeList sync_change_list; 504 int i = 0; 505 506 for (WordList::const_iterator it = dictionary_change.to_add().begin(); 507 it != dictionary_change.to_add().end() && i < upload_size; 508 ++it, ++i) { 509 std::string word = *it; 510 sync_pb::EntitySpecifics specifics; 511 specifics.mutable_dictionary()->set_word(word); 512 sync_change_list.push_back(syncer::SyncChange( 513 FROM_HERE, 514 syncer::SyncChange::ACTION_ADD, 515 syncer::SyncData::CreateLocalData(word, word, specifics))); 516 } 517 518 for (WordList::const_iterator it = dictionary_change.to_remove().begin(); 519 it != dictionary_change.to_remove().end(); 520 ++it) { 521 std::string word = *it; 522 sync_pb::EntitySpecifics specifics; 523 specifics.mutable_dictionary()->set_word(word); 524 sync_change_list.push_back(syncer::SyncChange( 525 FROM_HERE, 526 syncer::SyncChange::ACTION_DELETE, 527 syncer::SyncData::CreateLocalData(word, word, specifics))); 528 } 529 530 // Send the changes to the sync processor. 531 error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list); 532 if (error.IsSet()) 533 return error; 534 535 // Turn off syncing of this dictionary if the server already has the maximum 536 // number of words. 537 if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) 538 StopSyncing(syncer::DICTIONARY); 539 540 return error; 541 } 542 543 void SpellcheckCustomDictionary::Notify( 544 const SpellcheckCustomDictionary::Change& dictionary_change) { 545 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 546 if (!IsLoaded() || dictionary_change.empty()) 547 return; 548 FOR_EACH_OBSERVER(Observer, 549 observers_, 550 OnCustomDictionaryChanged(dictionary_change)); 551 } 552