1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2010, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.util; 11 12 import org.junit.Test; 13 import org.junit.runner.RunWith; 14 import org.junit.runners.JUnit4; 15 16 import com.ibm.icu.dev.test.TestFmwk; 17 import com.ibm.icu.impl.CharTrie; 18 import com.ibm.icu.impl.IntTrie; 19 import com.ibm.icu.impl.IntTrieBuilder; 20 import com.ibm.icu.impl.Trie; 21 import com.ibm.icu.impl.TrieBuilder; 22 import com.ibm.icu.impl.TrieIterator; 23 import com.ibm.icu.text.UTF16; 24 import com.ibm.icu.util.RangeValueIterator; 25 26 /** 27 * Testing class for Trie. Tests here will be simple, since both CharTrie and 28 * IntTrie are very similar and are heavily used in other parts of ICU4J. 29 * Codes using Tries are expected to have detailed tests. 30 * @author Syn Wee Quek 31 * @since release 2.1 Jan 01 2002 32 */ 33 @RunWith(JUnit4.class) 34 public final class TrieTest extends TestFmwk 35 { 36 // constructor --------------------------------------------------- 37 38 /** 39 * Constructor 40 */ 41 public TrieTest() 42 { 43 } 44 45 // public methods ----------------------------------------------- 46 47 /** 48 * Values for setting possibly overlapping, out-of-order ranges of values 49 */ 50 private static final class SetRange 51 { 52 SetRange(int start, int limit, int value, boolean overwrite) 53 { 54 this.start = start; 55 this.limit = limit; 56 this.value = value; 57 this.overwrite = overwrite; 58 } 59 60 int start, limit; 61 int value; 62 boolean overwrite; 63 } 64 65 /** 66 * Values for testing: 67 * value is set from the previous boundary's limit to before 68 * this boundary's limit 69 */ 70 private static final class CheckRange 71 { 72 CheckRange(int limit, int value) 73 { 74 this.limit = limit; 75 this.value = value; 76 } 77 78 int limit; 79 int value; 80 } 81 82 private static final class _testFoldedValue 83 implements TrieBuilder.DataManipulate 84 { 85 public _testFoldedValue(IntTrieBuilder builder) 86 { 87 m_builder_ = builder; 88 } 89 90 @Override 91 public int getFoldedValue(int start, int offset) 92 { 93 int foldedValue = 0; 94 int limit = start + 0x400; 95 while (start < limit) { 96 int value = m_builder_.getValue(start); 97 if (m_builder_.isInZeroBlock(start)) { 98 start += TrieBuilder.DATA_BLOCK_LENGTH; 99 } 100 else { 101 foldedValue |= value; 102 ++ start; 103 } 104 } 105 106 if (foldedValue != 0) { 107 return (offset << 16) | foldedValue; 108 } 109 return 0; 110 } 111 112 private IntTrieBuilder m_builder_; 113 } 114 115 private static final class _testFoldingOffset 116 implements Trie.DataManipulate 117 { 118 @Override 119 public int getFoldingOffset(int value) 120 { 121 return value >>> 16; 122 } 123 } 124 125 private static final class _testEnumValue extends TrieIterator 126 { 127 public _testEnumValue(Trie data) 128 { 129 super(data); 130 } 131 132 @Override 133 protected int extract(int value) 134 { 135 return value ^ 0x5555; 136 } 137 } 138 139 private void _testTrieIteration(IntTrie trie, CheckRange checkRanges[], 140 int countCheckRanges) 141 { 142 // write a string 143 int countValues = 0; 144 StringBuffer s = new StringBuffer(); 145 int values[] = new int[30]; 146 for (int i = 0; i < countCheckRanges; ++ i) { 147 int c = checkRanges[i].limit; 148 if (c != 0) { 149 -- c; 150 UTF16.append(s, c); 151 values[countValues ++] = checkRanges[i].value; 152 } 153 } 154 int limit = s.length(); 155 // try forward 156 int p = 0; 157 int i = 0; 158 while(p < limit) { 159 int c = UTF16.charAt(s, p); 160 p += UTF16.getCharCount(c); 161 int value = trie.getCodePointValue(c); 162 if (value != values[i]) { 163 errln("wrong value from UTRIE_NEXT(U+" 164 + Integer.toHexString(c) + "): 0x" 165 + Integer.toHexString(value) + " instead of 0x" 166 + Integer.toHexString(values[i])); 167 } 168 // unlike the c version lead is 0 if c is non-supplementary 169 char lead = UTF16.getLeadSurrogate(c); 170 char trail = UTF16.getTrailSurrogate(c); 171 if (lead == 0 172 ? trail != s.charAt(p - 1) 173 : !UTF16.isLeadSurrogate(lead) 174 || !UTF16.isTrailSurrogate(trail) || lead != s.charAt(p - 2) 175 || trail != s.charAt(p - 1)) { 176 errln("wrong (lead, trail) from UTRIE_NEXT(U+" 177 + Integer.toHexString(c)); 178 continue; 179 } 180 if (lead != 0) { 181 value = trie.getLeadValue(lead); 182 value = trie.getTrailValue(value, trail); 183 if (value != trie.getSurrogateValue(lead, trail) 184 && value != values[i]) { 185 errln("wrong value from getting supplementary " 186 + "values (U+" 187 + Integer.toHexString(c) + "): 0x" 188 + Integer.toHexString(value) + " instead of 0x" 189 + Integer.toHexString(values[i])); 190 } 191 } 192 ++ i; 193 } 194 } 195 196 private void _testTrieRanges(SetRange setRanges[], int countSetRanges, 197 CheckRange checkRanges[], int countCheckRanges, 198 boolean latin1Linear) 199 { 200 IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000, 201 checkRanges[0].value, 202 checkRanges[0].value, 203 latin1Linear); 204 205 // set values from setRanges[] 206 boolean ok = true; 207 for (int i = 0; i < countSetRanges; ++ i) { 208 int start = setRanges[i].start; 209 int limit = setRanges[i].limit; 210 int value = setRanges[i].value; 211 boolean overwrite = setRanges[i].overwrite; 212 if ((limit - start) == 1 && overwrite) { 213 ok &= newTrie.setValue(start, value); 214 } 215 else { 216 ok &= newTrie.setRange(start, limit, value, overwrite); 217 } 218 } 219 if (!ok) { 220 errln("setting values into a trie failed"); 221 return; 222 } 223 224 // verify that all these values are in the new Trie 225 int start = 0; 226 for (int i = 0; i < countCheckRanges; ++ i) { 227 int limit = checkRanges[i].limit; 228 int value = checkRanges[i].value; 229 230 while (start < limit) { 231 if (value != newTrie.getValue(start)) { 232 errln("newTrie [U+" 233 + Integer.toHexString(start) + "]==0x" 234 + Integer.toHexString(newTrie.getValue(start)) 235 + " instead of 0x" + Integer.toHexString(value)); 236 } 237 ++ start; 238 } 239 } 240 241 IntTrie trie = newTrie.serialize(new _testFoldedValue(newTrie), 242 new _testFoldingOffset()); 243 244 // test linear Latin-1 range from utrie_getData() 245 if (latin1Linear) { 246 start = 0; 247 for (int i = 0; i < countCheckRanges && start <= 0xff; ++ i) { 248 int limit = checkRanges[i].limit; 249 int value = checkRanges[i].value; 250 251 while (start < limit && start <= 0xff) { 252 if (value != trie.getLatin1LinearValue((char)start)) { 253 errln("IntTrie.getLatin1LinearValue[U+" 254 + Integer.toHexString(start) + "]==0x" 255 + Integer.toHexString( 256 trie.getLatin1LinearValue((char) start)) 257 + " instead of 0x" + Integer.toHexString(value)); 258 } 259 ++ start; 260 } 261 } 262 } 263 264 if (latin1Linear != trie.isLatin1Linear()) { 265 errln("trie serialization did not preserve " 266 + "Latin-1-linearity"); 267 } 268 269 // verify that all these values are in the serialized Trie 270 start = 0; 271 for (int i = 0; i < countCheckRanges; ++ i) { 272 int limit = checkRanges[i].limit; 273 int value = checkRanges[i].value; 274 275 if (start == 0xd800) { 276 // skip surrogates 277 start = limit; 278 continue; 279 } 280 281 while (start < limit) { 282 if (start <= 0xffff) { 283 int value2 = trie.getBMPValue((char)start); 284 if (value != value2) { 285 errln("serialized trie.getBMPValue(U+" 286 + Integer.toHexString(start) + " == 0x" 287 + Integer.toHexString(value2) + " instead of 0x" 288 + Integer.toHexString(value)); 289 } 290 if (!UTF16.isLeadSurrogate((char)start)) { 291 value2 = trie.getLeadValue((char)start); 292 if (value != value2) { 293 errln("serialized trie.getLeadValue(U+" 294 + Integer.toHexString(start) + " == 0x" 295 + Integer.toHexString(value2) + " instead of 0x" 296 + Integer.toHexString(value)); 297 } 298 } 299 } 300 int value2 = trie.getCodePointValue(start); 301 if (value != value2) { 302 errln("serialized trie.getCodePointValue(U+" 303 + Integer.toHexString(start) + ")==0x" 304 + Integer.toHexString(value2) + " instead of 0x" 305 + Integer.toHexString(value)); 306 } 307 ++ start; 308 } 309 } 310 311 // enumerate and verify all ranges 312 313 int enumRanges = 1; 314 TrieIterator iter = new _testEnumValue(trie); 315 RangeValueIterator.Element result = new RangeValueIterator.Element(); 316 while (iter.next(result)) { 317 if (result.start != checkRanges[enumRanges -1].limit 318 || result.limit != checkRanges[enumRanges].limit 319 || (result.value ^ 0x5555) != checkRanges[enumRanges].value) { 320 errln("utrie_enum() delivers wrong range [U+" 321 + Integer.toHexString(result.start) + "..U+" 322 + Integer.toHexString(result.limit) + "].0x" 323 + Integer.toHexString(result.value ^ 0x5555) 324 + " instead of [U+" 325 + Integer.toHexString(checkRanges[enumRanges -1].limit) 326 + "..U+" 327 + Integer.toHexString(checkRanges[enumRanges].limit) 328 + "].0x" 329 + Integer.toHexString(checkRanges[enumRanges].value)); 330 } 331 enumRanges ++; 332 } 333 334 // test linear Latin-1 range 335 if (trie.isLatin1Linear()) { 336 for (start = 0; start < 0x100; ++ start) { 337 if (trie.getLatin1LinearValue((char)start) 338 != trie.getLeadValue((char)start)) { 339 errln("trie.getLatin1LinearValue[U+" 340 + Integer.toHexString(start) + "]=0x" 341 + Integer.toHexString( 342 trie.getLatin1LinearValue((char)start)) 343 + " instead of 0x" 344 + Integer.toHexString( 345 trie.getLeadValue((char)start))); 346 } 347 } 348 } 349 350 _testTrieIteration(trie, checkRanges, countCheckRanges); 351 } 352 353 private void _testTrieRanges2(SetRange setRanges[], 354 int countSetRanges, 355 CheckRange checkRanges[], 356 int countCheckRanges) 357 { 358 _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges, 359 false); 360 361 _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges, 362 true); 363 } 364 365 private void _testTrieRanges4(SetRange setRanges[], int countSetRanges, 366 CheckRange checkRanges[], 367 int countCheckRanges) 368 { 369 _testTrieRanges2(setRanges, countSetRanges, checkRanges, 370 countCheckRanges); 371 } 372 373 // test data ------------------------------------------------------------ 374 375 /** 376 * set consecutive ranges, even with value 0 377 */ 378 private static SetRange setRanges1[]={ 379 new SetRange(0, 0x20, 0, false), 380 new SetRange(0x20, 0xa7, 0x1234, false), 381 new SetRange(0xa7, 0x3400, 0, false), 382 new SetRange(0x3400, 0x9fa6, 0x6162, false), 383 new SetRange(0x9fa6, 0xda9e, 0x3132, false), 384 // try to disrupt _testFoldingOffset16() 385 new SetRange(0xdada, 0xeeee, 0x87ff, false), 386 new SetRange(0xeeee, 0x11111, 1, false), 387 new SetRange(0x11111, 0x44444, 0x6162, false), 388 new SetRange(0x44444, 0x60003, 0, false), 389 new SetRange(0xf0003, 0xf0004, 0xf, false), 390 new SetRange(0xf0004, 0xf0006, 0x10, false), 391 new SetRange(0xf0006, 0xf0007, 0x11, false), 392 new SetRange(0xf0007, 0xf0020, 0x12, false), 393 new SetRange(0xf0020, 0x110000, 0, false) 394 }; 395 396 private static CheckRange checkRanges1[]={ 397 new CheckRange(0, 0), // dummy start range to make _testEnumRange() simpler 398 new CheckRange(0x20, 0), 399 new CheckRange(0xa7, 0x1234), 400 new CheckRange(0x3400, 0), 401 new CheckRange(0x9fa6, 0x6162), 402 new CheckRange(0xda9e, 0x3132), 403 new CheckRange(0xdada, 0), 404 new CheckRange(0xeeee, 0x87ff), 405 new CheckRange(0x11111,1), 406 new CheckRange(0x44444,0x6162), 407 new CheckRange(0xf0003,0), 408 new CheckRange(0xf0004,0xf), 409 new CheckRange(0xf0006,0x10), 410 new CheckRange(0xf0007,0x11), 411 new CheckRange(0xf0020,0x12), 412 new CheckRange(0x110000, 0) 413 }; 414 415 /** 416 * set some interesting overlapping ranges 417 */ 418 private static SetRange setRanges2[]={ 419 new SetRange(0x21, 0x7f, 0x5555, true), 420 new SetRange(0x2f800,0x2fedc, 0x7a, true), 421 new SetRange(0x72, 0xdd, 3, true), 422 new SetRange(0xdd, 0xde, 4, false), 423 new SetRange(0x2f987,0x2fa98, 5, true), 424 new SetRange(0x2f777,0x2f833, 0, true), 425 new SetRange(0x2f900,0x2ffee, 1, false), 426 new SetRange(0x2ffee,0x2ffef, 2, true) 427 }; 428 429 private static CheckRange checkRanges2[]={ 430 // dummy start range to make _testEnumRange() simpler 431 new CheckRange(0, 0), 432 new CheckRange(0x21, 0), 433 new CheckRange(0x72, 0x5555), 434 new CheckRange(0xdd, 3), 435 new CheckRange(0xde, 4), 436 new CheckRange(0x2f833,0), 437 new CheckRange(0x2f987,0x7a), 438 new CheckRange(0x2fa98,5), 439 new CheckRange(0x2fedc,0x7a), 440 new CheckRange(0x2ffee,1), 441 new CheckRange(0x2ffef,2), 442 new CheckRange(0x110000, 0) 443 }; 444 445 /** 446 * use a non-zero initial value 447 */ 448 private static SetRange setRanges3[]={ 449 new SetRange(0x31, 0xa4, 1, false), 450 new SetRange(0x3400, 0x6789, 2, false), 451 new SetRange(0x30000,0x34567,9, true), 452 new SetRange(0x45678,0x56789,3, true) 453 }; 454 455 private static CheckRange checkRanges3[]={ 456 // dummy start range, also carries the initial value 457 new CheckRange(0, 9), 458 new CheckRange(0x31, 9), 459 new CheckRange(0xa4, 1), 460 new CheckRange(0x3400, 9), 461 new CheckRange(0x6789, 2), 462 new CheckRange(0x45678,9), 463 new CheckRange(0x56789,3), 464 new CheckRange(0x110000,9) 465 }; 466 467 @Test 468 public void TestIntTrie() 469 { 470 _testTrieRanges4(setRanges1, setRanges1.length, checkRanges1, 471 checkRanges1.length); 472 _testTrieRanges4(setRanges2, setRanges2.length, checkRanges2, 473 checkRanges2.length); 474 _testTrieRanges4(setRanges3, setRanges3.length, checkRanges3, 475 checkRanges3.length); 476 } 477 478 private static class DummyGetFoldingOffset implements Trie.DataManipulate { 479 @Override 480 public int getFoldingOffset(int value) { 481 return -1; /* never get non-initialValue data for supplementary code points */ 482 } 483 } 484 485 @Test 486 public void TestDummyCharTrie() { 487 CharTrie trie; 488 final int initialValue=0x313, leadUnitValue=0xaffe; 489 int value; 490 int c; 491 trie=new CharTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset()); 492 493 /* test that all code points have initialValue */ 494 for(c=0; c<=0x10ffff; ++c) { 495 value=trie.getCodePointValue(c); 496 if(value!=initialValue) { 497 errln("CharTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue)); 498 } 499 } 500 501 /* test that the lead surrogate code units have leadUnitValue */ 502 for(c=0xd800; c<=0xdbff; ++c) { 503 value=trie.getLeadValue((char)c); 504 if(value!=leadUnitValue) { 505 errln("CharTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue)); 506 } 507 } 508 } 509 510 @Test 511 public void TestDummyIntTrie() { 512 IntTrie trie; 513 final int initialValue=0x01234567, leadUnitValue=0x89abcdef; 514 int value; 515 int c; 516 trie=new IntTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset()); 517 518 /* test that all code points have initialValue */ 519 for(c=0; c<=0x10ffff; ++c) { 520 value=trie.getCodePointValue(c); 521 if(value!=initialValue) { 522 errln("IntTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue)); 523 } 524 } 525 526 /* test that the lead surrogate code units have leadUnitValue */ 527 for(c=0xd800; c<=0xdbff; ++c) { 528 value=trie.getLeadValue((char)c); 529 if(value!=leadUnitValue) { 530 errln("IntTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue)); 531 } 532 } 533 } 534 } 535