1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h" 6 7 #include <functional> 8 9 #include "base/file_util.h" 10 #include "base/files/important_file_writer.h" 11 #include "base/md5.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_split.h" 14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h" 15 #include "chrome/common/chrome_constants.h" 16 #include "chrome/common/spellcheck_messages.h" 17 #include "content/public/browser/browser_thread.h" 18 #include "sync/api/sync_change.h" 19 #include "sync/api/sync_data.h" 20 #include "sync/api/sync_error_factory.h" 21 #include "sync/protocol/sync.pb.h" 22 23 using content::BrowserThread; 24 using chrome::spellcheck_common::WordList; 25 using chrome::spellcheck_common::WordSet; 26 27 namespace { 28 29 // Filename extension for backup dictionary file. 30 const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup"); 31 32 // Prefix for the checksum in the dictionary file. 33 const char CHECKSUM_PREFIX[] = "checksum_v1 = "; 34 35 // The status of the checksum in a custom spellcheck dictionary. 36 enum ChecksumStatus { 37 VALID_CHECKSUM, 38 INVALID_CHECKSUM, 39 }; 40 41 // The result of a dictionary sanitation. Can be used as a bitmap. 42 enum ChangeSanitationResult { 43 // The change is valid and can be applied as-is. 44 VALID_CHANGE = 0, 45 46 // The change contained words to be added that are not valid. 47 DETECTED_INVALID_WORDS = 1, 48 49 // The change contained words to be added that are already in the dictionary. 50 DETECTED_DUPLICATE_WORDS = 2, 51 52 // The change contained words to be removed that are not in the dictionary. 53 DETECTED_MISSING_WORDS = 4, 54 }; 55 56 // Loads the file at |file_path| into the |words| container. If the file has a 57 // valid checksum, then returns ChecksumStatus::VALID. If the file has an 58 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|. 59 ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) { 60 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 61 words.clear(); 62 std::string contents; 63 base::ReadFileToString(file_path, &contents); 64 size_t pos = contents.rfind(CHECKSUM_PREFIX); 65 if (pos != std::string::npos) { 66 std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX)); 67 contents = contents.substr(0, pos); 68 if (checksum != base::MD5String(contents)) 69 return INVALID_CHECKSUM; 70 } 71 TrimWhitespaceASCII(contents, TRIM_ALL, &contents); 72 base::SplitString(contents, '\n', &words); 73 return VALID_CHECKSUM; 74 } 75 76 // Returns true for invalid words and false for valid words. 77 bool IsInvalidWord(const std::string& word) { 78 std::string tmp; 79 return !IsStringUTF8(word) || 80 word.length() > 81 chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES || 82 word.empty() || 83 TRIM_NONE != TrimWhitespaceASCII(word, TRIM_ALL, &tmp); 84 } 85 86 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If 87 // the dictionary checksum is not valid, but backup checksum is valid, then 88 // restores the backup and loads that into |custom_words| instead. If the backup 89 // is invalid too, then clears |custom_words|. Must be called on the file 90 // thread. 91 void LoadDictionaryFileReliably(WordList& custom_words, 92 const base::FilePath& path) { 93 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 94 // Load the contents and verify the checksum. 95 if (LoadFile(path, custom_words) == VALID_CHECKSUM) 96 return; 97 // Checksum is not valid. See if there's a backup. 98 base::FilePath backup = path.AddExtension(BACKUP_EXTENSION); 99 if (!base::PathExists(backup)) 100 return; 101 // Load the backup and verify its checksum. 102 if (LoadFile(backup, custom_words) != VALID_CHECKSUM) 103 return; 104 // Backup checksum is valid. Restore the backup. 105 base::CopyFile(backup, path); 106 } 107 108 // Backs up the original dictionary, saves |custom_words| and its checksum into 109 // the custom spellcheck dictionary at |path|. 110 void SaveDictionaryFileReliably( 111 const WordList& custom_words, 112 const base::FilePath& path) { 113 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 114 std::stringstream content; 115 for (WordList::const_iterator it = custom_words.begin(); 116 it != custom_words.end(); 117 ++it) { 118 content << *it << '\n'; 119 } 120 std::string checksum = base::MD5String(content.str()); 121 content << CHECKSUM_PREFIX << checksum; 122 base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION)); 123 base::ImportantFileWriter::WriteFileAtomically(path, content.str()); 124 } 125 126 // Removes duplicate and invalid words from |to_add| word list and sorts it. 127 // Looks for duplicates in both |to_add| and |existing| word lists. Returns a 128 // bitmap of |ChangeSanitationResult| values. 129 int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) { 130 // Do not add duplicate words. 131 std::sort(to_add.begin(), to_add.end()); 132 WordList new_words = base::STLSetDifference<WordList>(to_add, existing); 133 new_words.erase(std::unique(new_words.begin(), new_words.end()), 134 new_words.end()); 135 int result = VALID_CHANGE; 136 if (to_add.size() != new_words.size()) 137 result |= DETECTED_DUPLICATE_WORDS; 138 // Do not add invalid words. 139 size_t size = new_words.size(); 140 new_words.erase(std::remove_if(new_words.begin(), 141 new_words.end(), 142 IsInvalidWord), 143 new_words.end()); 144 if (size != new_words.size()) 145 result |= DETECTED_INVALID_WORDS; 146 // Save the sanitized words to be added. 147 std::swap(to_add, new_words); 148 return result; 149 } 150 151 // Removes word from |to_remove| that are missing from |existing| word list and 152 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values. 153 int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) { 154 // Do not remove words that are missing from the dictionary. 155 std::sort(to_remove.begin(), to_remove.end()); 156 WordList found_words; 157 std::set_intersection(existing.begin(), 158 existing.end(), 159 to_remove.begin(), 160 to_remove.end(), 161 std::back_inserter(found_words)); 162 int result = VALID_CHANGE; 163 if (to_remove.size() > found_words.size()) 164 result |= DETECTED_MISSING_WORDS; 165 // Save the sanitized words to be removed. 166 std::swap(to_remove, found_words); 167 return result; 168 } 169 170 } // namespace 171 172 173 SpellcheckCustomDictionary::Change::Change() { 174 } 175 176 SpellcheckCustomDictionary::Change::Change( 177 const SpellcheckCustomDictionary::Change& other) 178 : to_add_(other.to_add()), 179 to_remove_(other.to_remove()) { 180 } 181 182 SpellcheckCustomDictionary::Change::Change(const WordList& to_add) 183 : to_add_(to_add) { 184 } 185 186 SpellcheckCustomDictionary::Change::~Change() { 187 } 188 189 void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) { 190 to_add_.push_back(word); 191 } 192 193 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) { 194 to_remove_.push_back(word); 195 } 196 197 int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) { 198 int result = VALID_CHANGE; 199 if (!to_add_.empty()) 200 result |= SanitizeWordsToAdd(words, to_add_); 201 if (!to_remove_.empty()) 202 result |= SanitizeWordsToRemove(words, to_remove_); 203 return result; 204 } 205 206 const WordList& SpellcheckCustomDictionary::Change::to_add() const { 207 return to_add_; 208 } 209 210 const WordList& SpellcheckCustomDictionary::Change::to_remove() const { 211 return to_remove_; 212 } 213 214 bool SpellcheckCustomDictionary::Change::empty() const { 215 return to_add_.empty() && to_remove_.empty(); 216 } 217 218 SpellcheckCustomDictionary::SpellcheckCustomDictionary( 219 const base::FilePath& path) 220 : custom_dictionary_path_(), 221 is_loaded_(false), 222 weak_ptr_factory_(this) { 223 custom_dictionary_path_ = 224 path.Append(chrome::kCustomDictionaryFileName); 225 } 226 227 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() { 228 } 229 230 const WordSet& SpellcheckCustomDictionary::GetWords() const { 231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 232 return words_; 233 } 234 235 bool SpellcheckCustomDictionary::AddWord(const std::string& word) { 236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 237 Change dictionary_change; 238 dictionary_change.AddWord(word); 239 int result = dictionary_change.Sanitize(GetWords()); 240 Apply(dictionary_change); 241 Notify(dictionary_change); 242 Sync(dictionary_change); 243 Save(dictionary_change); 244 return result == VALID_CHANGE; 245 } 246 247 bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) { 248 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 249 Change dictionary_change; 250 dictionary_change.RemoveWord(word); 251 int result = dictionary_change.Sanitize(GetWords()); 252 Apply(dictionary_change); 253 Notify(dictionary_change); 254 Sync(dictionary_change); 255 Save(dictionary_change); 256 return result == VALID_CHANGE; 257 } 258 259 bool SpellcheckCustomDictionary::HasWord(const std::string& word) const { 260 return !!words_.count(word); 261 } 262 263 void SpellcheckCustomDictionary::AddObserver(Observer* observer) { 264 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 265 observers_.AddObserver(observer); 266 } 267 268 void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) { 269 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 270 observers_.RemoveObserver(observer); 271 } 272 273 bool SpellcheckCustomDictionary::IsLoaded() { 274 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 275 return is_loaded_; 276 } 277 278 bool SpellcheckCustomDictionary::IsSyncing() { 279 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 280 return !!sync_processor_.get(); 281 } 282 283 void SpellcheckCustomDictionary::Load() { 284 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 285 BrowserThread::PostTaskAndReplyWithResult( 286 BrowserThread::FILE, 287 FROM_HERE, 288 base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile, 289 custom_dictionary_path_), 290 base::Bind(&SpellcheckCustomDictionary::OnLoaded, 291 weak_ptr_factory_.GetWeakPtr())); 292 } 293 294 syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing( 295 syncer::ModelType type, 296 const syncer::SyncDataList& initial_sync_data, 297 scoped_ptr<syncer::SyncChangeProcessor> sync_processor, 298 scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) { 299 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 300 DCHECK(!sync_processor_.get()); 301 DCHECK(!sync_error_handler_.get()); 302 DCHECK(sync_processor.get()); 303 DCHECK(sync_error_handler.get()); 304 DCHECK_EQ(syncer::DICTIONARY, type); 305 sync_processor_ = sync_processor.Pass(); 306 sync_error_handler_ = sync_error_handler.Pass(); 307 308 // Build a list of words to add locally. 309 WordList to_add_locally; 310 for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin(); 311 it != initial_sync_data.end(); 312 ++it) { 313 DCHECK_EQ(syncer::DICTIONARY, it->GetDataType()); 314 to_add_locally.push_back(it->GetSpecifics().dictionary().word()); 315 } 316 317 // Add remote words locally. 318 Change to_change_locally(to_add_locally); 319 to_change_locally.Sanitize(GetWords()); 320 Apply(to_change_locally); 321 Notify(to_change_locally); 322 Save(to_change_locally); 323 324 // Add as many as possible local words remotely. 325 std::sort(to_add_locally.begin(), to_add_locally.end()); 326 WordList to_add_remotely = base::STLSetDifference<WordList>(words_, 327 to_add_locally); 328 329 // Send local changes to the sync server. 330 Change to_change_remotely(to_add_remotely); 331 syncer::SyncMergeResult result(type); 332 result.set_error(Sync(to_change_remotely)); 333 return result; 334 } 335 336 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) { 337 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 338 DCHECK_EQ(syncer::DICTIONARY, type); 339 sync_processor_.reset(); 340 sync_error_handler_.reset(); 341 } 342 343 syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData( 344 syncer::ModelType type) const { 345 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 346 DCHECK_EQ(syncer::DICTIONARY, type); 347 syncer::SyncDataList data; 348 std::string word; 349 size_t i = 0; 350 for (WordSet::const_iterator it = words_.begin(); 351 it != words_.end() && 352 i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS; 353 ++it, ++i) { 354 word = *it; 355 sync_pb::EntitySpecifics specifics; 356 specifics.mutable_dictionary()->set_word(word); 357 data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics)); 358 } 359 return data; 360 } 361 362 syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges( 363 const tracked_objects::Location& from_here, 364 const syncer::SyncChangeList& change_list) { 365 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 366 Change dictionary_change; 367 for (syncer::SyncChangeList::const_iterator it = change_list.begin(); 368 it != change_list.end(); 369 ++it) { 370 DCHECK(it->IsValid()); 371 std::string word = it->sync_data().GetSpecifics().dictionary().word(); 372 switch (it->change_type()) { 373 case syncer::SyncChange::ACTION_ADD: 374 dictionary_change.AddWord(word); 375 break; 376 case syncer::SyncChange::ACTION_DELETE: 377 dictionary_change.RemoveWord(word); 378 break; 379 default: 380 return sync_error_handler_->CreateAndUploadError( 381 FROM_HERE, 382 "Processing sync changes failed on change type " + 383 syncer::SyncChange::ChangeTypeToString(it->change_type())); 384 } 385 } 386 387 dictionary_change.Sanitize(GetWords()); 388 Apply(dictionary_change); 389 Notify(dictionary_change); 390 Save(dictionary_change); 391 392 return syncer::SyncError(); 393 } 394 395 // static 396 WordList SpellcheckCustomDictionary::LoadDictionaryFile( 397 const base::FilePath& path) { 398 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 399 WordList words; 400 LoadDictionaryFileReliably(words, path); 401 if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words)) 402 SaveDictionaryFileReliably(words, path); 403 SpellCheckHostMetrics::RecordCustomWordCountStats(words.size()); 404 return words; 405 } 406 407 // static 408 void SpellcheckCustomDictionary::UpdateDictionaryFile( 409 const SpellcheckCustomDictionary::Change& dictionary_change, 410 const base::FilePath& path) { 411 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 412 if (dictionary_change.empty()) 413 return; 414 415 WordList custom_words; 416 LoadDictionaryFileReliably(custom_words, path); 417 418 // Add words. 419 custom_words.insert(custom_words.end(), 420 dictionary_change.to_add().begin(), 421 dictionary_change.to_add().end()); 422 423 // Remove words. 424 std::sort(custom_words.begin(), custom_words.end()); 425 WordList remaining = 426 base::STLSetDifference<WordList>(custom_words, 427 dictionary_change.to_remove()); 428 std::swap(custom_words, remaining); 429 430 SaveDictionaryFileReliably(custom_words, path); 431 } 432 433 void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) { 434 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 435 Change dictionary_change(custom_words); 436 dictionary_change.Sanitize(GetWords()); 437 Apply(dictionary_change); 438 Sync(dictionary_change); 439 is_loaded_ = true; 440 FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded()); 441 } 442 443 void SpellcheckCustomDictionary::Apply( 444 const SpellcheckCustomDictionary::Change& dictionary_change) { 445 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 446 if (!dictionary_change.to_add().empty()) { 447 words_.insert(dictionary_change.to_add().begin(), 448 dictionary_change.to_add().end()); 449 } 450 if (!dictionary_change.to_remove().empty()) { 451 WordSet updated_words = 452 base::STLSetDifference<WordSet>(words_, 453 dictionary_change.to_remove()); 454 std::swap(words_, updated_words); 455 } 456 } 457 458 void SpellcheckCustomDictionary::Save( 459 const SpellcheckCustomDictionary::Change& dictionary_change) { 460 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 461 BrowserThread::PostTask( 462 BrowserThread::FILE, 463 FROM_HERE, 464 base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile, 465 dictionary_change, 466 custom_dictionary_path_)); 467 } 468 469 syncer::SyncError SpellcheckCustomDictionary::Sync( 470 const SpellcheckCustomDictionary::Change& dictionary_change) { 471 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 472 syncer::SyncError error; 473 if (!IsSyncing() || dictionary_change.empty()) 474 return error; 475 476 // The number of words on the sync server should not exceed the limits. 477 int server_size = static_cast<int>(words_.size()) - 478 static_cast<int>(dictionary_change.to_add().size()); 479 int max_upload_size = std::max( 480 0, 481 static_cast<int>( 482 chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) - 483 server_size); 484 int upload_size = std::min( 485 static_cast<int>(dictionary_change.to_add().size()), 486 max_upload_size); 487 488 syncer::SyncChangeList sync_change_list; 489 int i = 0; 490 491 for (WordList::const_iterator it = dictionary_change.to_add().begin(); 492 it != dictionary_change.to_add().end() && i < upload_size; 493 ++it, ++i) { 494 std::string word = *it; 495 sync_pb::EntitySpecifics specifics; 496 specifics.mutable_dictionary()->set_word(word); 497 sync_change_list.push_back(syncer::SyncChange( 498 FROM_HERE, 499 syncer::SyncChange::ACTION_ADD, 500 syncer::SyncData::CreateLocalData(word, word, specifics))); 501 } 502 503 for (WordList::const_iterator it = dictionary_change.to_remove().begin(); 504 it != dictionary_change.to_remove().end(); 505 ++it) { 506 std::string word = *it; 507 sync_pb::EntitySpecifics specifics; 508 specifics.mutable_dictionary()->set_word(word); 509 sync_change_list.push_back(syncer::SyncChange( 510 FROM_HERE, 511 syncer::SyncChange::ACTION_DELETE, 512 syncer::SyncData::CreateLocalData(word, word, specifics))); 513 } 514 515 // Send the changes to the sync processor. 516 error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list); 517 if (error.IsSet()) 518 return error; 519 520 // Turn off syncing of this dictionary if the server already has the maximum 521 // number of words. 522 if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) 523 StopSyncing(syncer::DICTIONARY); 524 525 return error; 526 } 527 528 void SpellcheckCustomDictionary::Notify( 529 const SpellcheckCustomDictionary::Change& dictionary_change) { 530 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 531 if (!IsLoaded() || dictionary_change.empty()) 532 return; 533 FOR_EACH_OBSERVER(Observer, 534 observers_, 535 OnCustomDictionaryChanged(dictionary_change)); 536 } 537