1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/i18n/break_iterator.h" 6 7 #include "base/string_piece.h" 8 #include "base/string_util.h" 9 #include "base/utf_string_conversions.h" 10 #include "testing/gtest/include/gtest/gtest.h" 11 12 TEST(BreakIteratorTest, BreakWordEmpty) { 13 string16 empty; 14 base::BreakIterator iter(&empty, base::BreakIterator::BREAK_WORD); 15 ASSERT_TRUE(iter.Init()); 16 EXPECT_FALSE(iter.Advance()); 17 EXPECT_FALSE(iter.IsWord()); 18 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 19 EXPECT_FALSE(iter.IsWord()); 20 } 21 22 TEST(BreakIteratorTest, BreakWord) { 23 string16 space(UTF8ToUTF16(" ")); 24 string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); 25 base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); 26 ASSERT_TRUE(iter.Init()); 27 EXPECT_TRUE(iter.Advance()); 28 EXPECT_FALSE(iter.IsWord()); 29 EXPECT_EQ(space, iter.GetString()); 30 EXPECT_TRUE(iter.Advance()); 31 EXPECT_TRUE(iter.IsWord()); 32 EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString()); 33 EXPECT_TRUE(iter.Advance()); 34 EXPECT_FALSE(iter.IsWord()); 35 EXPECT_EQ(space, iter.GetString()); 36 EXPECT_TRUE(iter.Advance()); 37 EXPECT_TRUE(iter.IsWord()); 38 EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString()); 39 EXPECT_TRUE(iter.Advance()); 40 EXPECT_FALSE(iter.IsWord()); 41 EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString()); 42 EXPECT_TRUE(iter.Advance()); 43 EXPECT_FALSE(iter.IsWord()); 44 EXPECT_EQ(space, iter.GetString()); 45 EXPECT_TRUE(iter.Advance()); 46 EXPECT_FALSE(iter.IsWord()); 47 EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString()); 48 EXPECT_TRUE(iter.Advance()); 49 EXPECT_TRUE(iter.IsWord()); 50 EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString()); 51 EXPECT_TRUE(iter.Advance()); 52 EXPECT_FALSE(iter.IsWord()); 53 EXPECT_EQ(space, iter.GetString()); 54 EXPECT_TRUE(iter.Advance()); 55 EXPECT_TRUE(iter.IsWord()); 56 EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); 57 EXPECT_FALSE(iter.Advance()); 58 EXPECT_FALSE(iter.IsWord()); 59 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 60 EXPECT_FALSE(iter.IsWord()); 61 } 62 63 TEST(BreakIteratorTest, BreakWide16) { 64 // Two greek words separated by space. 65 const string16 str(WideToUTF16( 66 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 67 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); 68 const string16 word1(str.substr(0, 10)); 69 const string16 word2(str.substr(11, 5)); 70 base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); 71 ASSERT_TRUE(iter.Init()); 72 EXPECT_TRUE(iter.Advance()); 73 EXPECT_TRUE(iter.IsWord()); 74 EXPECT_EQ(word1, iter.GetString()); 75 EXPECT_TRUE(iter.Advance()); 76 EXPECT_FALSE(iter.IsWord()); 77 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); 78 EXPECT_TRUE(iter.Advance()); 79 EXPECT_TRUE(iter.IsWord()); 80 EXPECT_EQ(word2, iter.GetString()); 81 EXPECT_FALSE(iter.Advance()); 82 EXPECT_FALSE(iter.IsWord()); 83 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 84 EXPECT_FALSE(iter.IsWord()); 85 } 86 87 TEST(BreakIteratorTest, BreakWide32) { 88 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A 89 const char* very_wide_char = "\xF0\x9D\x92\x9C"; 90 const string16 str( 91 UTF8ToUTF16(StringPrintf("%s a", very_wide_char))); 92 const string16 very_wide_word(str.substr(0, 2)); 93 94 base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD); 95 ASSERT_TRUE(iter.Init()); 96 EXPECT_TRUE(iter.Advance()); 97 EXPECT_TRUE(iter.IsWord()); 98 EXPECT_EQ(very_wide_word, iter.GetString()); 99 EXPECT_TRUE(iter.Advance()); 100 EXPECT_FALSE(iter.IsWord()); 101 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); 102 EXPECT_TRUE(iter.Advance()); 103 EXPECT_TRUE(iter.IsWord()); 104 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); 105 EXPECT_FALSE(iter.Advance()); 106 EXPECT_FALSE(iter.IsWord()); 107 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 108 EXPECT_FALSE(iter.IsWord()); 109 } 110 111 TEST(BreakIteratorTest, BreakSpaceEmpty) { 112 string16 empty; 113 base::BreakIterator iter(&empty, base::BreakIterator::BREAK_SPACE); 114 ASSERT_TRUE(iter.Init()); 115 EXPECT_FALSE(iter.Advance()); 116 EXPECT_FALSE(iter.IsWord()); 117 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 118 EXPECT_FALSE(iter.IsWord()); 119 } 120 121 TEST(BreakIteratorTest, BreakSpace) { 122 string16 str(UTF8ToUTF16(" foo bar! \npouet boom")); 123 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); 124 ASSERT_TRUE(iter.Init()); 125 EXPECT_TRUE(iter.Advance()); 126 EXPECT_FALSE(iter.IsWord()); 127 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); 128 EXPECT_TRUE(iter.Advance()); 129 EXPECT_FALSE(iter.IsWord()); 130 EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString()); 131 EXPECT_TRUE(iter.Advance()); 132 EXPECT_FALSE(iter.IsWord()); 133 EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString()); 134 EXPECT_TRUE(iter.Advance()); 135 EXPECT_FALSE(iter.IsWord()); 136 EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString()); 137 EXPECT_TRUE(iter.Advance()); 138 EXPECT_FALSE(iter.IsWord()); 139 EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString()); 140 EXPECT_FALSE(iter.Advance()); 141 EXPECT_FALSE(iter.IsWord()); 142 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 143 EXPECT_FALSE(iter.IsWord()); 144 } 145 146 TEST(BreakIteratorTest, BreakSpaceSP) { 147 string16 str(UTF8ToUTF16(" foo bar! \npouet boom ")); 148 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); 149 ASSERT_TRUE(iter.Init()); 150 EXPECT_TRUE(iter.Advance()); 151 EXPECT_FALSE(iter.IsWord()); 152 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString()); 153 EXPECT_TRUE(iter.Advance()); 154 EXPECT_FALSE(iter.IsWord()); 155 EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString()); 156 EXPECT_TRUE(iter.Advance()); 157 EXPECT_FALSE(iter.IsWord()); 158 EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString()); 159 EXPECT_TRUE(iter.Advance()); 160 EXPECT_FALSE(iter.IsWord()); 161 EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString()); 162 EXPECT_TRUE(iter.Advance()); 163 EXPECT_FALSE(iter.IsWord()); 164 EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString()); 165 EXPECT_FALSE(iter.Advance()); 166 EXPECT_FALSE(iter.IsWord()); 167 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 168 EXPECT_FALSE(iter.IsWord()); 169 } 170 171 TEST(BreakIteratorTest, BreakSpacekWide16) { 172 // Two Greek words. 173 const string16 str(WideToUTF16( 174 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 175 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2")); 176 const string16 word1(str.substr(0, 11)); 177 const string16 word2(str.substr(11, 5)); 178 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); 179 ASSERT_TRUE(iter.Init()); 180 EXPECT_TRUE(iter.Advance()); 181 EXPECT_FALSE(iter.IsWord()); 182 EXPECT_EQ(word1, iter.GetString()); 183 EXPECT_TRUE(iter.Advance()); 184 EXPECT_FALSE(iter.IsWord()); 185 EXPECT_EQ(word2, iter.GetString()); 186 EXPECT_FALSE(iter.Advance()); 187 EXPECT_FALSE(iter.IsWord()); 188 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 189 EXPECT_FALSE(iter.IsWord()); 190 } 191 192 TEST(BreakIteratorTest, BreakSpaceWide32) { 193 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A 194 const char* very_wide_char = "\xF0\x9D\x92\x9C"; 195 const string16 str( 196 UTF8ToUTF16(StringPrintf("%s a", very_wide_char))); 197 const string16 very_wide_word(str.substr(0, 3)); 198 199 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE); 200 ASSERT_TRUE(iter.Init()); 201 EXPECT_TRUE(iter.Advance()); 202 EXPECT_FALSE(iter.IsWord()); 203 EXPECT_EQ(very_wide_word, iter.GetString()); 204 EXPECT_TRUE(iter.Advance()); 205 EXPECT_FALSE(iter.IsWord()); 206 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); 207 EXPECT_FALSE(iter.Advance()); 208 EXPECT_FALSE(iter.IsWord()); 209 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 210 EXPECT_FALSE(iter.IsWord()); 211 } 212 213 TEST(BreakIteratorTest, BreakLineEmpty) { 214 string16 empty; 215 base::BreakIterator iter(&empty, base::BreakIterator::BREAK_NEWLINE); 216 ASSERT_TRUE(iter.Init()); 217 EXPECT_FALSE(iter.Advance()); 218 EXPECT_FALSE(iter.IsWord()); 219 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 220 EXPECT_FALSE(iter.IsWord()); 221 } 222 223 TEST(BreakIteratorTest, BreakLine) { 224 string16 nl(UTF8ToUTF16("\n")); 225 string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom")); 226 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); 227 ASSERT_TRUE(iter.Init()); 228 EXPECT_TRUE(iter.Advance()); 229 EXPECT_FALSE(iter.IsWord()); 230 EXPECT_EQ(nl, iter.GetString()); 231 EXPECT_TRUE(iter.Advance()); 232 EXPECT_FALSE(iter.IsWord()); 233 EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString()); 234 EXPECT_TRUE(iter.Advance()); 235 EXPECT_FALSE(iter.IsWord()); 236 EXPECT_EQ(nl, iter.GetString()); 237 EXPECT_TRUE(iter.Advance()); 238 EXPECT_FALSE(iter.IsWord()); 239 EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString()); 240 EXPECT_FALSE(iter.Advance()); 241 EXPECT_FALSE(iter.IsWord()); 242 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 243 EXPECT_FALSE(iter.IsWord()); 244 } 245 246 TEST(BreakIteratorTest, BreakLineNL) { 247 string16 nl(UTF8ToUTF16("\n")); 248 string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n")); 249 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); 250 ASSERT_TRUE(iter.Init()); 251 EXPECT_TRUE(iter.Advance()); 252 EXPECT_FALSE(iter.IsWord()); 253 EXPECT_EQ(nl, iter.GetString()); 254 EXPECT_TRUE(iter.Advance()); 255 EXPECT_FALSE(iter.IsWord()); 256 EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString()); 257 EXPECT_TRUE(iter.Advance()); 258 EXPECT_FALSE(iter.IsWord()); 259 EXPECT_EQ(nl, iter.GetString()); 260 EXPECT_TRUE(iter.Advance()); 261 EXPECT_FALSE(iter.IsWord()); 262 EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString()); 263 EXPECT_FALSE(iter.Advance()); 264 EXPECT_FALSE(iter.IsWord()); 265 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 266 EXPECT_FALSE(iter.IsWord()); 267 } 268 269 TEST(BreakIteratorTest, BreakLineWide16) { 270 // Two Greek words separated by newline. 271 const string16 str(WideToUTF16( 272 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 273 L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2")); 274 const string16 line1(str.substr(0, 11)); 275 const string16 line2(str.substr(11, 5)); 276 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); 277 ASSERT_TRUE(iter.Init()); 278 EXPECT_TRUE(iter.Advance()); 279 EXPECT_FALSE(iter.IsWord()); 280 EXPECT_EQ(line1, iter.GetString()); 281 EXPECT_TRUE(iter.Advance()); 282 EXPECT_FALSE(iter.IsWord()); 283 EXPECT_EQ(line2, iter.GetString()); 284 EXPECT_FALSE(iter.Advance()); 285 EXPECT_FALSE(iter.IsWord()); 286 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 287 EXPECT_FALSE(iter.IsWord()); 288 } 289 290 TEST(BreakIteratorTest, BreakLineWide32) { 291 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A 292 const char* very_wide_char = "\xF0\x9D\x92\x9C"; 293 const string16 str( 294 UTF8ToUTF16(StringPrintf("%s\na", very_wide_char))); 295 const string16 very_wide_line(str.substr(0, 3)); 296 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE); 297 ASSERT_TRUE(iter.Init()); 298 EXPECT_TRUE(iter.Advance()); 299 EXPECT_FALSE(iter.IsWord()); 300 EXPECT_EQ(very_wide_line, iter.GetString()); 301 EXPECT_TRUE(iter.Advance()); 302 EXPECT_FALSE(iter.IsWord()); 303 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString()); 304 EXPECT_FALSE(iter.Advance()); 305 EXPECT_FALSE(iter.IsWord()); 306 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end. 307 EXPECT_FALSE(iter.IsWord()); 308 } 309