1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <map> 6 #include <string> 7 8 #include "base/file_util.h" 9 #include "base/format_macros.h" 10 #include "base/i18n/icu_string_conversions.h" 11 #include "base/strings/stringprintf.h" 12 #include "base/strings/utf_string_conversions.h" 13 #include "chrome/tools/convert_dict/aff_reader.h" 14 #include "chrome/tools/convert_dict/dic_reader.h" 15 #include "testing/gtest/include/gtest/gtest.h" 16 #include "third_party/hunspell/google/bdict_reader.h" 17 #include "third_party/hunspell/google/bdict_writer.h" 18 19 namespace { 20 21 // Compares the given word list with the serialized trie to make sure they 22 // are the same. 23 // (This function is copied from "chrome/tools/convert_dict/convert_dict.cc"). 24 bool VerifyWords(const convert_dict::DicReader::WordList& org_words, 25 const std::string& serialized) { 26 hunspell::BDictReader reader; 27 EXPECT_TRUE( 28 reader.Init(reinterpret_cast<const unsigned char*>(serialized.data()), 29 serialized.size())); 30 31 hunspell::WordIterator iter = reader.GetAllWordIterator(); 32 33 int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; 34 35 static const int kBufSize = 128; 36 char buf[kBufSize]; 37 for (size_t i = 0; i < org_words.size(); i++) { 38 SCOPED_TRACE(base::StringPrintf( 39 "org_words[%" PRIuS "]: %s", i, org_words[i].first.c_str())); 40 41 int affix_matches = iter.Advance(buf, kBufSize, affix_ids); 42 EXPECT_NE(0, affix_matches); 43 EXPECT_EQ(org_words[i].first, std::string(buf)); 44 EXPECT_EQ(affix_matches, static_cast<int>(org_words[i].second.size())); 45 46 // Check the individual affix indices. 47 for (size_t affix_index = 0; affix_index < org_words[i].second.size(); 48 affix_index++) { 49 EXPECT_EQ(affix_ids[affix_index], org_words[i].second[affix_index]); 50 } 51 } 52 53 return true; 54 } 55 56 // Implements the test process used by ConvertDictTest. 57 // This function encapsulates all complicated operations used by 58 // ConvertDictTest so we can conceal them from the tests themselves. 59 // This function consists of the following parts: 60 // * Creates a dummy affix file and a dictionary file. 61 // * Reads the dummy files. 62 // * Creates bdict data. 63 // * Verify the bdict data. 64 void RunDictionaryTest(const char* codepage, 65 const std::map<string16, bool>& word_list) { 66 // Create an affix data and a dictionary data. 67 std::string aff_data(base::StringPrintf("SET %s\n", codepage)); 68 69 std::string dic_data(base::StringPrintf("%" PRIuS "\n", word_list.size())); 70 for (std::map<string16, bool>::const_iterator it = word_list.begin(); 71 it != word_list.end(); ++it) { 72 std::string encoded_word; 73 EXPECT_TRUE(UTF16ToCodepage(it->first, 74 codepage, 75 base::OnStringConversionError::FAIL, 76 &encoded_word)); 77 dic_data += encoded_word; 78 dic_data += "\n"; 79 } 80 81 // Create a temporary affix file and a dictionary file from the test data. 82 base::FilePath aff_file; 83 file_util::CreateTemporaryFile(&aff_file); 84 file_util::WriteFile(aff_file, aff_data.c_str(), aff_data.length()); 85 86 base::FilePath dic_file; 87 file_util::CreateTemporaryFile(&dic_file); 88 file_util::WriteFile(dic_file, dic_data.c_str(), dic_data.length()); 89 90 { 91 // Read the above affix file with AffReader and read the dictionary file 92 // with DicReader, respectively. 93 convert_dict::AffReader aff_reader(aff_file); 94 EXPECT_TRUE(aff_reader.Read()); 95 96 convert_dict::DicReader dic_reader(dic_file); 97 EXPECT_TRUE(dic_reader.Read(&aff_reader)); 98 99 // Verify this DicReader includes all the input words. 100 EXPECT_EQ(word_list.size(), dic_reader.words().size()); 101 for (size_t i = 0; i < dic_reader.words().size(); ++i) { 102 SCOPED_TRACE(base::StringPrintf("dic_reader.words()[%" PRIuS "]: %s", 103 i, dic_reader.words()[i].first.c_str())); 104 string16 word(UTF8ToUTF16(dic_reader.words()[i].first)); 105 EXPECT_TRUE(word_list.find(word) != word_list.end()); 106 } 107 108 // Create BDICT data and verify it. 109 hunspell::BDictWriter writer; 110 writer.SetComment(aff_reader.comments()); 111 writer.SetAffixRules(aff_reader.affix_rules()); 112 writer.SetAffixGroups(aff_reader.GetAffixGroups()); 113 writer.SetReplacements(aff_reader.replacements()); 114 writer.SetOtherCommands(aff_reader.other_commands()); 115 writer.SetWords(dic_reader.words()); 116 117 std::string bdict_data = writer.GetBDict(); 118 VerifyWords(dic_reader.words(), bdict_data); 119 EXPECT_TRUE(hunspell::BDict::Verify(bdict_data.data(), bdict_data.size())); 120 121 // Trim the end of this BDICT and verify our verifier tells these trimmed 122 // BDICTs are corrupted. 123 for (size_t i = 1; i < bdict_data.size(); ++i) { 124 SCOPED_TRACE(base::StringPrintf("i = %" PRIuS, i)); 125 EXPECT_FALSE(hunspell::BDict::Verify(bdict_data.data(), 126 bdict_data.size() - i)); 127 } 128 } 129 130 // Deletes the temporary files. 131 // We need to delete them after the above AffReader and DicReader are deleted 132 // since they close the input files in their destructors. 133 base::DeleteFile(aff_file, false); 134 base::DeleteFile(dic_file, false); 135 } 136 137 } // namespace 138 139 // Tests whether or not our DicReader can read all the input English words 140 TEST(ConvertDictTest, English) { 141 const char kCodepage[] = "UTF-8"; 142 const wchar_t* kWords[] = { 143 L"I", 144 L"he", 145 L"she", 146 L"it", 147 L"we", 148 L"you", 149 L"they", 150 }; 151 152 std::map<string16, bool> word_list; 153 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) 154 word_list.insert(std::make_pair<string16, bool>(WideToUTF16(kWords[i]), 155 true)); 156 157 RunDictionaryTest(kCodepage, word_list); 158 } 159 160 // Tests whether or not our DicReader can read all the input Russian words. 161 TEST(ConvertDictTest, Russian) { 162 const char kCodepage[] = "KOI8-R"; 163 const wchar_t* kWords[] = { 164 L"\x044f", 165 L"\x0442\x044b", 166 L"\x043e\x043d", 167 L"\x043e\x043d\x0430", 168 L"\x043e\x043d\x043e", 169 L"\x043c\x044b", 170 L"\x0432\x044b", 171 L"\x043e\x043d\x0438", 172 }; 173 174 std::map<string16, bool> word_list; 175 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) 176 word_list.insert(std::make_pair<string16, bool>(WideToUTF16(kWords[i]), 177 true)); 178 179 RunDictionaryTest(kCodepage, word_list); 180 } 181 182 // Tests whether or not our DicReader can read all the input Hungarian words. 183 TEST(ConvertDictTest, Hungarian) { 184 const char kCodepage[] = "ISO8859-2"; 185 const wchar_t* kWords[] = { 186 L"\x00e9\x006e", 187 L"\x0074\x0065", 188 L"\x0151", 189 L"\x00f6\x006e", 190 L"\x006d\x0061\x0067\x0061", 191 L"\x006d\x0069", 192 L"\x0074\x0069", 193 L"\x0151\x006b", 194 L"\x00f6\x006e\x00f6\x006b", 195 L"\x006d\x0061\x0067\x0075\x006b", 196 }; 197 198 std::map<string16, bool> word_list; 199 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kWords); ++i) 200 word_list.insert(std::make_pair<string16, bool>(WideToUTF16(kWords[i]), 201 true)); 202 203 RunDictionaryTest(kCodepage, word_list); 204 } 205