1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2001-2014, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 /** 11 * Port From: ICU4C v1.8.1 : rbbi : RBBIAPITest 12 * Source File: $ICU4CRoot/source/test/intltest/rbbiapts.cpp 13 **/ 14 15 package com.ibm.icu.dev.test.rbbi; 16 17 import java.io.ByteArrayOutputStream; 18 import java.io.PrintStream; 19 import java.text.CharacterIterator; 20 import java.text.StringCharacterIterator; 21 import java.util.ArrayList; 22 import java.util.List; 23 import java.util.Locale; 24 25 import org.junit.Test; 26 import org.junit.runner.RunWith; 27 import org.junit.runners.JUnit4; 28 29 import com.ibm.icu.dev.test.TestFmwk; 30 import com.ibm.icu.text.BreakIterator; 31 import com.ibm.icu.text.RuleBasedBreakIterator; 32 import com.ibm.icu.util.ULocale; 33 34 /** 35 * API Test the RuleBasedBreakIterator class 36 */ 37 @RunWith(JUnit4.class) 38 public class RBBIAPITest extends TestFmwk { 39 /** 40 * Tests clone() and equals() methods of RuleBasedBreakIterator 41 **/ 42 @Test 43 public void TestCloneEquals() { 44 RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 45 RuleBasedBreakIterator biequal = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 46 RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 47 RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator) BreakIterator.getWordInstance(Locale.getDefault()); 48 49 String testString = "Testing word break iterators's clone() and equals()"; 50 bi1.setText(testString); 51 bi2.setText(testString); 52 biequal.setText(testString); 53 54 bi3.setText("hello"); 55 logln("Testing equals()"); 56 logln("Testing == and !="); 57 if (!bi1.equals(biequal) || bi1.equals(bi2) || bi1.equals(bi3)) 58 errln("ERROR:1 RBBI's == and !- operator failed."); 59 if (bi2.equals(biequal) || bi2.equals(bi1) || biequal.equals(bi3)) 60 errln("ERROR:2 RBBI's == and != operator failed."); 61 logln("Testing clone()"); 62 RuleBasedBreakIterator bi1clone = (RuleBasedBreakIterator) bi1.clone(); 63 RuleBasedBreakIterator bi2clone = (RuleBasedBreakIterator) bi2.clone(); 64 if (!bi1clone.equals(bi1) 65 || !bi1clone.equals(biequal) 66 || bi1clone.equals(bi3) 67 || bi1clone.equals(bi2)) 68 errln("ERROR:1 RBBI's clone() method failed"); 69 70 if (bi2clone.equals(bi1) 71 || bi2clone.equals(biequal) 72 || bi2clone.equals(bi3) 73 || !bi2clone.equals(bi2)) 74 errln("ERROR:2 RBBI's clone() method failed"); 75 76 if (!bi1.getText().equals(bi1clone.getText()) 77 || !bi2clone.getText().equals(bi2.getText()) 78 || bi2clone.equals(bi1clone)) 79 errln("ERROR: RBBI's clone() method failed"); 80 } 81 82 /** 83 * Tests toString() method of RuleBasedBreakIterator 84 **/ 85 @Test 86 public void TestToString() { 87 RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 88 RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator) BreakIterator.getWordInstance(Locale.getDefault()); 89 logln("Testing toString()"); 90 bi1.setText("Hello there"); 91 RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator) bi1.clone(); 92 String temp = bi1.toString(); 93 String temp2 = bi2.toString(); 94 String temp3 = bi3.toString(); 95 if (temp2.equals(temp3) || temp.equals(temp2) || !temp.equals(temp3)) 96 errln("ERROR: error in toString() method"); 97 } 98 99 /** 100 * Tests the method hashCode() of RuleBasedBreakIterator 101 **/ 102 @Test 103 public void TestHashCode() { 104 RuleBasedBreakIterator bi1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 105 RuleBasedBreakIterator bi3 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 106 RuleBasedBreakIterator bi2 = (RuleBasedBreakIterator) BreakIterator.getWordInstance(Locale.getDefault()); 107 logln("Testing hashCode()"); 108 bi1.setText("Hash code"); 109 bi2.setText("Hash code"); 110 bi3.setText("Hash code"); 111 RuleBasedBreakIterator bi1clone = (RuleBasedBreakIterator) bi1.clone(); 112 RuleBasedBreakIterator bi2clone = (RuleBasedBreakIterator) bi2.clone(); 113 if (bi1.hashCode() != bi1clone.hashCode() 114 || bi1.hashCode() != bi3.hashCode() 115 || bi1clone.hashCode() != bi3.hashCode() 116 || bi2.hashCode() != bi2clone.hashCode()) 117 errln("ERROR: identical objects have different hashcodes"); 118 119 if (bi1.hashCode() == bi2.hashCode() 120 || bi2.hashCode() == bi3.hashCode() 121 || bi1clone.hashCode() == bi2clone.hashCode() 122 || bi1clone.hashCode() == bi2.hashCode()) 123 errln("ERROR: different objects have same hashcodes"); 124 } 125 126 /** 127 * Tests the methods getText() and setText() of RuleBasedBreakIterator 128 **/ 129 @Test 130 public void TestGetSetText() { 131 logln("Testing getText setText "); 132 String str1 = "first string."; 133 String str2 = "Second string."; 134 //RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 135 RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator) BreakIterator.getWordInstance(Locale.getDefault()); 136 CharacterIterator text1 = new StringCharacterIterator(str1); 137 //CharacterIterator text1Clone = (CharacterIterator) text1.clone(); 138 //CharacterIterator text2 = new StringCharacterIterator(str2); 139 wordIter1.setText(str1); 140 if (!wordIter1.getText().equals(text1)) 141 errln("ERROR:1 error in setText or getText "); 142 if (wordIter1.current() != 0) 143 errln("ERROR:1 setText did not set the iteration position to the beginning of the text, it is" 144 + wordIter1.current() + "\n"); 145 wordIter1.next(2); 146 wordIter1.setText(str2); 147 if (wordIter1.current() != 0) 148 errln("ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" 149 + wordIter1.current() + "\n"); 150 151 // Test the CharSequence overload of setText() for a simple case. 152 BreakIterator lineIter = BreakIterator.getLineInstance(Locale.ENGLISH); 153 CharSequence csText = "Hello, World. "; 154 // Expected Line Brks ^ ^ ^ 155 // 0123456789012345 156 List<Integer> expected = new ArrayList<Integer>(); 157 expected.add(0); expected.add(7); expected.add(14); 158 lineIter.setText(csText); 159 for (int pos = lineIter.first(); pos != BreakIterator.DONE; pos = lineIter.next()) { 160 assertTrue("", expected.contains(pos)); 161 } 162 assertEquals("", csText.length(), lineIter.current()); 163 } 164 165 /** 166 * Testing the methods first(), next(), next(int) and following() of RuleBasedBreakIterator 167 * TODO: Most of this test should be retired, rule behavior is much better covered by 168 * TestExtended, which is also easier to understand and maintain. 169 **/ 170 @Test 171 public void TestFirstNextFollowing() { 172 int p, q; 173 String testString = "This is a word break. Isn't it? 2.25"; 174 logln("Testing first() and next(), following() with custom rules"); 175 logln("testing word iterator - string :- \"" + testString + "\"\n"); 176 RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator) BreakIterator.getWordInstance(Locale.getDefault()); 177 wordIter1.setText(testString); 178 p = wordIter1.first(); 179 if (p != 0) 180 errln("ERROR: first() returned" + p + "instead of 0"); 181 q = wordIter1.next(9); 182 doTest(testString, p, q, 20, "This is a word break"); 183 p = q; 184 q = wordIter1.next(); 185 doTest(testString, p, q, 21, "."); 186 p = q; 187 q = wordIter1.next(3); 188 doTest(testString, p, q, 28, " Isn't "); 189 p = q; 190 q = wordIter1.next(2); 191 doTest(testString, p, q, 31, "it?"); 192 q = wordIter1.following(2); 193 doTest(testString, 2, q, 4, "is"); 194 q = wordIter1.following(22); 195 doTest(testString, 22, q, 27, "Isn't"); 196 wordIter1.last(); 197 p = wordIter1.next(); 198 q = wordIter1.following(wordIter1.last()); 199 if (p != BreakIterator.DONE || q != BreakIterator.DONE) 200 errln("ERROR: next()/following() at last position returned #" 201 + p + " and " + q + " instead of" + testString.length() + "\n"); 202 RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.getDefault()); 203 testString = "Write hindi here. "; 204 logln("testing char iter - string:- \"" + testString + "\""); 205 charIter1.setText(testString); 206 p = charIter1.first(); 207 if (p != 0) 208 errln("ERROR: first() returned" + p + "instead of 0"); 209 q = charIter1.next(); 210 doTest(testString, p, q, 1, "W"); 211 p = q; 212 q = charIter1.next(4); 213 doTest(testString, p, q, 5, "rite"); 214 p = q; 215 q = charIter1.next(12); 216 doTest(testString, p, q, 17, " hindi here."); 217 p = q; 218 q = charIter1.next(-6); 219 doTest(testString, p, q, 11, " here."); 220 p = q; 221 q = charIter1.next(6); 222 doTest(testString, p, q, 17, " here."); 223 p = charIter1.following(charIter1.last()); 224 q = charIter1.next(charIter1.last()); 225 if (p != BreakIterator.DONE || q != BreakIterator.DONE) 226 errln("ERROR: following()/next() at last position returned #" 227 + p + " and " + q + " instead of" + testString.length()); 228 testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000."; 229 RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(Locale.getDefault()); 230 logln("testing sentence iter - String:- \"" + testString + "\""); 231 sentIter1.setText(testString); 232 p = sentIter1.first(); 233 if (p != 0) 234 errln("ERROR: first() returned" + p + "instead of 0"); 235 q = sentIter1.next(); 236 doTest(testString, p, q, 7, "Hello! "); 237 p = q; 238 q = sentIter1.next(2); 239 doTest(testString, p, q, 31, "how are you? I'am fine. "); 240 p = q; 241 q = sentIter1.next(-2); 242 doTest(testString, p, q, 7, "how are you? I'am fine. "); 243 p = q; 244 q = sentIter1.next(4); 245 doTest(testString, p, q, 60, "how are you? I'am fine. Thankyou. How are you doing? "); 246 p = q; 247 q = sentIter1.next(); 248 doTest(testString, p, q, 83, "This costs $20,00,000."); 249 q = sentIter1.following(1); 250 doTest(testString, 1, q, 7, "ello! "); 251 q = sentIter1.following(10); 252 doTest(testString, 10, q, 20, " are you? "); 253 q = sentIter1.following(20); 254 doTest(testString, 20, q, 31, "I'am fine. "); 255 p = sentIter1.following(sentIter1.last()); 256 q = sentIter1.next(sentIter1.last()); 257 if (p != BreakIterator.DONE || q != BreakIterator.DONE) 258 errln("ERROR: following()/next() at last position returned #" 259 + p + " and " + q + " instead of" + testString.length()); 260 testString = "Hello! how\r\n (are)\r you? I'am fine- Thankyou. foo\u00a0bar How, are, you? This, costs $20,00,000."; 261 logln("(UnicodeString)testing line iter - String:- \"" + testString + "\""); 262 RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator) BreakIterator.getLineInstance(Locale.getDefault()); 263 lineIter1.setText(testString); 264 p = lineIter1.first(); 265 if (p != 0) 266 errln("ERROR: first() returned" + p + "instead of 0"); 267 q = lineIter1.next(); 268 doTest(testString, p, q, 7, "Hello! "); 269 p = q; 270 p = q; 271 q = lineIter1.next(4); 272 doTest(testString, p, q, 20, "how\r\n (are)\r "); 273 p = q; 274 q = lineIter1.next(-4); 275 doTest(testString, p, q, 7, "how\r\n (are)\r "); 276 p = q; 277 q = lineIter1.next(6); 278 doTest(testString, p, q, 30, "how\r\n (are)\r you? I'am "); 279 p = q; 280 q = lineIter1.next(); 281 doTest(testString, p, q, 36, "fine- "); 282 p = q; 283 q = lineIter1.next(2); 284 doTest(testString, p, q, 54, "Thankyou. foo\u00a0bar "); 285 q = lineIter1.following(60); 286 doTest(testString, 60, q, 64, "re, "); 287 q = lineIter1.following(1); 288 doTest(testString, 1, q, 7, "ello! "); 289 q = lineIter1.following(10); 290 doTest(testString, 10, q, 12, "\r\n"); 291 q = lineIter1.following(20); 292 doTest(testString, 20, q, 25, "you? "); 293 p = lineIter1.following(lineIter1.last()); 294 q = lineIter1.next(lineIter1.last()); 295 if (p != BreakIterator.DONE || q != BreakIterator.DONE) 296 errln("ERROR: following()/next() at last position returned #" 297 + p + " and " + q + " instead of" + testString.length()); 298 } 299 300 /** 301 * Testing the methods last(), previous(), and preceding() of RuleBasedBreakIterator 302 **/ 303 @Test 304 public void TestLastPreviousPreceding() { 305 int p, q; 306 String testString = "This is a word break. Isn't it? 2.25 dollars"; 307 logln("Testing last(),previous(), preceding() with custom rules"); 308 logln("testing word iteration for string \"" + testString + "\""); 309 RuleBasedBreakIterator wordIter1 = (RuleBasedBreakIterator) BreakIterator.getWordInstance(Locale.ENGLISH); 310 wordIter1.setText(testString); 311 p = wordIter1.last(); 312 if (p != testString.length()) { 313 errln("ERROR: last() returned" + p + "instead of" + testString.length()); 314 } 315 q = wordIter1.previous(); 316 doTest(testString, p, q, 37, "dollars"); 317 p = q; 318 q = wordIter1.previous(); 319 doTest(testString, p, q, 36, " "); 320 q = wordIter1.preceding(25); 321 doTest(testString, 25, q, 22, "Isn"); 322 p = q; 323 q = wordIter1.previous(); 324 doTest(testString, p, q, 21, " "); 325 q = wordIter1.preceding(20); 326 doTest(testString, 20, q, 15, "break"); 327 p = wordIter1.preceding(wordIter1.first()); 328 if (p != BreakIterator.DONE) 329 errln("ERROR: preceding() at starting position returned #" + p + " instead of 0"); 330 testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000."; 331 logln("testing sentence iter - String:- \"" + testString + "\""); 332 RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(Locale.getDefault()); 333 sentIter1.setText(testString); 334 p = sentIter1.last(); 335 if (p != testString.length()) 336 errln("ERROR: last() returned" + p + "instead of " + testString.length()); 337 q = sentIter1.previous(); 338 doTest(testString, p, q, 60, "This costs $20,00,000."); 339 p = q; 340 q = sentIter1.previous(); 341 doTest(testString, p, q, 41, "How are you doing? "); 342 q = sentIter1.preceding(40); 343 doTest(testString, 40, q, 31, "Thankyou."); 344 q = sentIter1.preceding(25); 345 doTest(testString, 25, q, 20, "I'am "); 346 sentIter1.first(); 347 p = sentIter1.previous(); 348 q = sentIter1.preceding(sentIter1.first()); 349 if (p != BreakIterator.DONE || q != BreakIterator.DONE) 350 errln("ERROR: previous()/preceding() at starting position returned #" 351 + p + " and " + q + " instead of 0\n"); 352 testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000."; 353 logln("testing line iter - String:- \"" + testString + "\""); 354 RuleBasedBreakIterator lineIter1 = (RuleBasedBreakIterator) BreakIterator.getLineInstance(Locale.getDefault()); 355 lineIter1.setText(testString); 356 p = lineIter1.last(); 357 if (p != testString.length()) 358 errln("ERROR: last() returned" + p + "instead of " + testString.length()); 359 q = lineIter1.previous(); 360 doTest(testString, p, q, 72, "$20,00,000."); 361 p = q; 362 q = lineIter1.previous(); 363 doTest(testString, p, q, 66, "costs "); 364 q = lineIter1.preceding(40); 365 doTest(testString, 40, q, 31, "Thankyou."); 366 q = lineIter1.preceding(25); 367 doTest(testString, 25, q, 20, "I'am "); 368 lineIter1.first(); 369 p = lineIter1.previous(); 370 q = lineIter1.preceding(sentIter1.first()); 371 if (p != BreakIterator.DONE || q != BreakIterator.DONE) 372 errln("ERROR: previous()/preceding() at starting position returned #" 373 + p + " and " + q + " instead of 0\n"); 374 } 375 376 /** 377 * Tests the method IsBoundary() of RuleBasedBreakIterator 378 **/ 379 @Test 380 public void TestIsBoundary() { 381 String testString1 = "Write here. \u092d\u0301\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 a\u0301u"; 382 RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(Locale.ENGLISH); 383 charIter1.setText(testString1); 384 int bounds1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 25, 26}; 385 doBoundaryTest(charIter1, testString1, bounds1); 386 RuleBasedBreakIterator wordIter2 = (RuleBasedBreakIterator) BreakIterator.getWordInstance(Locale.ENGLISH); 387 wordIter2.setText(testString1); 388 int bounds2[] = {0, 5, 6, 10, 11, 12, 16, 17, 22, 23, 26}; 389 doBoundaryTest(wordIter2, testString1, bounds2); 390 } 391 392 /** 393 * Tests the rule status return value constants 394 */ 395 @Test 396 public void TestRuleStatus() { 397 BreakIterator bi = BreakIterator.getWordInstance(ULocale.ENGLISH); 398 399 bi.setText("# "); 400 assertEquals(null, bi.next(), 1); 401 assertTrue(null, bi.getRuleStatus() >= RuleBasedBreakIterator.WORD_NONE); 402 assertTrue(null, bi.getRuleStatus() < RuleBasedBreakIterator.WORD_NONE_LIMIT); 403 404 bi.setText("3 "); 405 assertEquals(null, bi.next(), 1); 406 assertTrue(null, bi.getRuleStatus() >= RuleBasedBreakIterator.WORD_NUMBER); 407 assertTrue(null, bi.getRuleStatus() < RuleBasedBreakIterator.WORD_NUMBER_LIMIT); 408 409 bi.setText("a "); 410 assertEquals(null, bi.next(), 1); 411 assertTrue(null, bi.getRuleStatus() >= RuleBasedBreakIterator.WORD_LETTER ); 412 assertTrue(null, bi.getRuleStatus() < RuleBasedBreakIterator.WORD_LETTER_LIMIT); 413 414 415 bi.setText(" "); 416 assertEquals(null, bi.next(), 1); 417 assertTrue(null, bi.getRuleStatus() >= RuleBasedBreakIterator.WORD_KANA ); 418 // TODO: ticket #10261, Kana is not returning the correct status. 419 // assertTrue(null, bi.getRuleStatus() < RuleBasedBreakIterator.WORD_KANA_LIMIT); 420 // System.out.println("\n" + bi.getRuleStatus()); 421 422 bi.setText(" "); 423 assertEquals(null, bi.next(), 1); 424 assertTrue(null, bi.getRuleStatus() >= RuleBasedBreakIterator.WORD_IDEO ); 425 assertTrue(null, bi.getRuleStatus() < RuleBasedBreakIterator.WORD_IDEO_LIMIT); 426 } 427 428 /** 429 * Tests the rule dump debug function. 430 */ 431 @Test 432 public void TestRuledump() { 433 RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.getCharacterInstance(); 434 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 435 PrintStream out = new PrintStream(bos); 436 bi.dump(out); 437 assertTrue(null, bos.size() > 100); 438 } 439 440 //--------------------------------------------- 441 //Internal subroutines 442 //--------------------------------------------- 443 444 /* Internal subroutine used by TestIsBoundary() */ 445 private void doBoundaryTest(BreakIterator bi, String text, int[] boundaries) { 446 logln("testIsBoundary():"); 447 int p = 0; 448 boolean isB; 449 for (int i = 0; i < text.length(); i++) { 450 isB = bi.isBoundary(i); 451 logln("bi.isBoundary(" + i + ") -> " + isB); 452 if (i == boundaries[p]) { 453 if (!isB) 454 errln("Wrong result from isBoundary() for " + i + ": expected true, got false"); 455 p++; 456 } else { 457 if (isB) 458 errln("Wrong result from isBoundary() for " + i + ": expected false, got true"); 459 } 460 } 461 } 462 463 /*Internal subroutine used for comparison of expected and acquired results */ 464 private void doTest(String testString, int start, int gotoffset, int expectedOffset, String expectedString) { 465 String selected; 466 String expected = expectedString; 467 if (gotoffset != expectedOffset) 468 errln("ERROR:****returned #" + gotoffset + " instead of #" + expectedOffset); 469 if (start <= gotoffset) { 470 selected = testString.substring(start, gotoffset); 471 } else { 472 selected = testString.substring(gotoffset, start); 473 } 474 if (!selected.equals(expected)) 475 errln("ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""); 476 else 477 logln("****selected \"" + selected + "\""); 478 } 479 480 @Test 481 public void testGetTitleInstance() { 482 BreakIterator bi = BreakIterator.getTitleInstance(new Locale("en", "CA")); 483 TestFmwk.assertNotEquals("Title instance break iterator not correctly instantiated", bi.first(), null); 484 bi.setText("Here is some Text"); 485 TestFmwk.assertEquals("Title instance break iterator not correctly instantiated", bi.first(), 0); 486 } 487 } 488