Home | History | Annotate | Download | only in util
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /**
      4 *******************************************************************************
      5 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
      6 * others. All Rights Reserved.                                                *
      7 *******************************************************************************
      8 */
      9 
     10 package com.ibm.icu.dev.test.util;
     11 
     12 import org.junit.Test;
     13 import org.junit.runner.RunWith;
     14 import org.junit.runners.JUnit4;
     15 
     16 import com.ibm.icu.dev.test.TestFmwk;
     17 import com.ibm.icu.impl.CharTrie;
     18 import com.ibm.icu.impl.IntTrie;
     19 import com.ibm.icu.impl.IntTrieBuilder;
     20 import com.ibm.icu.impl.Trie;
     21 import com.ibm.icu.impl.TrieBuilder;
     22 import com.ibm.icu.impl.TrieIterator;
     23 import com.ibm.icu.text.UTF16;
     24 import com.ibm.icu.util.RangeValueIterator;
     25 
     26 /**
     27 * Testing class for Trie. Tests here will be simple, since both CharTrie and
     28 * IntTrie are very similar and are heavily used in other parts of ICU4J.
     29 * Codes using Tries are expected to have detailed tests.
     30 * @author Syn Wee Quek
     31 * @since release 2.1 Jan 01 2002
     32 */
     33 @RunWith(JUnit4.class)
     34 public final class TrieTest extends TestFmwk
     35 {
     36     // constructor ---------------------------------------------------
     37 
     38     /**
     39     * Constructor
     40     */
     41     public TrieTest()
     42     {
     43     }
     44 
     45     // public methods -----------------------------------------------
     46 
     47     /**
     48      * Values for setting possibly overlapping, out-of-order ranges of values
     49      */
     50     private static final class SetRange
     51     {
     52         SetRange(int start, int limit, int value, boolean overwrite)
     53         {
     54             this.start = start;
     55             this.limit = limit;
     56             this.value = value;
     57             this.overwrite = overwrite;
     58         }
     59 
     60         int start, limit;
     61         int value;
     62         boolean overwrite;
     63     }
     64 
     65     /**
     66      * Values for testing:
     67      * value is set from the previous boundary's limit to before
     68      * this boundary's limit
     69      */
     70     private static final class CheckRange
     71     {
     72         CheckRange(int limit, int value)
     73         {
     74             this.limit = limit;
     75             this.value = value;
     76         }
     77 
     78         int limit;
     79         int value;
     80     }
     81 
     82     private static final class _testFoldedValue
     83                                         implements TrieBuilder.DataManipulate
     84     {
     85         public _testFoldedValue(IntTrieBuilder builder)
     86         {
     87             m_builder_ = builder;
     88         }
     89 
     90         @Override
     91         public int getFoldedValue(int start, int offset)
     92         {
     93             int foldedValue = 0;
     94             int limit = start + 0x400;
     95             while (start < limit) {
     96                 int value = m_builder_.getValue(start);
     97                 if (m_builder_.isInZeroBlock(start)) {
     98                     start += TrieBuilder.DATA_BLOCK_LENGTH;
     99                 }
    100                 else {
    101                     foldedValue |= value;
    102                     ++ start;
    103                 }
    104             }
    105 
    106             if (foldedValue != 0) {
    107                 return (offset << 16) | foldedValue;
    108             }
    109             return 0;
    110         }
    111 
    112         private IntTrieBuilder m_builder_;
    113     }
    114 
    115     private static final class _testFoldingOffset
    116                                                 implements Trie.DataManipulate
    117     {
    118         @Override
    119         public int getFoldingOffset(int value)
    120         {
    121             return value >>> 16;
    122         }
    123     }
    124 
    125     private static final class _testEnumValue extends TrieIterator
    126     {
    127         public _testEnumValue(Trie data)
    128         {
    129             super(data);
    130         }
    131 
    132         @Override
    133         protected int extract(int value)
    134         {
    135             return value ^ 0x5555;
    136         }
    137     }
    138 
    139     private void _testTrieIteration(IntTrie trie, CheckRange checkRanges[],
    140                                     int countCheckRanges)
    141     {
    142         // write a string
    143         int countValues = 0;
    144         StringBuffer s = new StringBuffer();
    145         int values[] = new int[30];
    146         for (int i = 0; i < countCheckRanges; ++ i) {
    147             int c = checkRanges[i].limit;
    148             if (c != 0) {
    149                 -- c;
    150                 UTF16.append(s, c);
    151                 values[countValues ++] = checkRanges[i].value;
    152             }
    153         }
    154         int limit = s.length();
    155         // try forward
    156         int p = 0;
    157         int i = 0;
    158         while(p < limit) {
    159             int c = UTF16.charAt(s, p);
    160             p += UTF16.getCharCount(c);
    161             int value = trie.getCodePointValue(c);
    162             if (value != values[i]) {
    163                 errln("wrong value from UTRIE_NEXT(U+"
    164                       + Integer.toHexString(c) + "): 0x"
    165                       + Integer.toHexString(value) + " instead of 0x"
    166                       + Integer.toHexString(values[i]));
    167             }
    168             // unlike the c version lead is 0 if c is non-supplementary
    169             char lead = UTF16.getLeadSurrogate(c);
    170             char trail = UTF16.getTrailSurrogate(c);
    171             if (lead == 0
    172                 ? trail != s.charAt(p - 1)
    173                 : !UTF16.isLeadSurrogate(lead)
    174                   || !UTF16.isTrailSurrogate(trail) || lead != s.charAt(p - 2)
    175                   || trail != s.charAt(p - 1)) {
    176                 errln("wrong (lead, trail) from UTRIE_NEXT(U+"
    177                       + Integer.toHexString(c));
    178                 continue;
    179             }
    180             if (lead != 0) {
    181                 value = trie.getLeadValue(lead);
    182                 value = trie.getTrailValue(value, trail);
    183                 if (value != trie.getSurrogateValue(lead, trail)
    184                     && value != values[i]) {
    185                     errln("wrong value from getting supplementary "
    186                           + "values (U+"
    187                           + Integer.toHexString(c) + "): 0x"
    188                           + Integer.toHexString(value) + " instead of 0x"
    189                           + Integer.toHexString(values[i]));
    190                 }
    191             }
    192             ++ i;
    193         }
    194     }
    195 
    196     private void _testTrieRanges(SetRange setRanges[], int countSetRanges,
    197                                  CheckRange checkRanges[], int countCheckRanges,
    198                                  boolean latin1Linear)
    199     {
    200         IntTrieBuilder newTrie = new IntTrieBuilder(null, 2000,
    201                                                     checkRanges[0].value,
    202                                                     checkRanges[0].value,
    203                                                     latin1Linear);
    204 
    205         // set values from setRanges[]
    206         boolean ok = true;
    207         for (int i = 0; i < countSetRanges; ++ i) {
    208             int start = setRanges[i].start;
    209             int limit = setRanges[i].limit;
    210             int value = setRanges[i].value;
    211             boolean overwrite = setRanges[i].overwrite;
    212             if ((limit - start) == 1 && overwrite) {
    213                 ok &= newTrie.setValue(start, value);
    214             }
    215             else {
    216                 ok &= newTrie.setRange(start, limit, value, overwrite);
    217             }
    218         }
    219         if (!ok) {
    220             errln("setting values into a trie failed");
    221             return;
    222         }
    223 
    224         // verify that all these values are in the new Trie
    225         int start = 0;
    226         for (int i = 0; i < countCheckRanges; ++ i) {
    227             int limit = checkRanges[i].limit;
    228             int value = checkRanges[i].value;
    229 
    230             while (start < limit) {
    231                 if (value != newTrie.getValue(start)) {
    232                     errln("newTrie [U+"
    233                           + Integer.toHexString(start) + "]==0x"
    234                           + Integer.toHexString(newTrie.getValue(start))
    235                           + " instead of 0x" + Integer.toHexString(value));
    236                 }
    237                 ++ start;
    238             }
    239         }
    240 
    241         IntTrie trie = newTrie.serialize(new _testFoldedValue(newTrie),
    242                                          new _testFoldingOffset());
    243 
    244         // test linear Latin-1 range from utrie_getData()
    245         if (latin1Linear) {
    246             start = 0;
    247             for (int i = 0; i < countCheckRanges && start <= 0xff; ++ i) {
    248                 int limit = checkRanges[i].limit;
    249                 int value = checkRanges[i].value;
    250 
    251                 while (start < limit && start <= 0xff) {
    252                     if (value != trie.getLatin1LinearValue((char)start)) {
    253                         errln("IntTrie.getLatin1LinearValue[U+"
    254                               + Integer.toHexString(start) + "]==0x"
    255                               + Integer.toHexString(
    256                                         trie.getLatin1LinearValue((char) start))
    257                               + " instead of 0x" + Integer.toHexString(value));
    258                     }
    259                     ++ start;
    260                 }
    261             }
    262         }
    263 
    264         if (latin1Linear != trie.isLatin1Linear()) {
    265             errln("trie serialization did not preserve "
    266                   + "Latin-1-linearity");
    267         }
    268 
    269         // verify that all these values are in the serialized Trie
    270         start = 0;
    271         for (int i = 0; i < countCheckRanges; ++ i) {
    272             int limit = checkRanges[i].limit;
    273             int value = checkRanges[i].value;
    274 
    275             if (start == 0xd800) {
    276                 // skip surrogates
    277                 start = limit;
    278                 continue;
    279             }
    280 
    281             while (start < limit) {
    282                 if (start <= 0xffff) {
    283                     int value2 = trie.getBMPValue((char)start);
    284                     if (value != value2) {
    285                         errln("serialized trie.getBMPValue(U+"
    286                               + Integer.toHexString(start) + " == 0x"
    287                               + Integer.toHexString(value2) + " instead of 0x"
    288                               + Integer.toHexString(value));
    289                     }
    290                     if (!UTF16.isLeadSurrogate((char)start)) {
    291                         value2 = trie.getLeadValue((char)start);
    292                         if (value != value2) {
    293                             errln("serialized trie.getLeadValue(U+"
    294                               + Integer.toHexString(start) + " == 0x"
    295                               + Integer.toHexString(value2) + " instead of 0x"
    296                               + Integer.toHexString(value));
    297                         }
    298                     }
    299                 }
    300                 int value2 = trie.getCodePointValue(start);
    301                 if (value != value2) {
    302                     errln("serialized trie.getCodePointValue(U+"
    303                           + Integer.toHexString(start) + ")==0x"
    304                           + Integer.toHexString(value2) + " instead of 0x"
    305                           + Integer.toHexString(value));
    306                 }
    307                 ++ start;
    308             }
    309         }
    310 
    311         // enumerate and verify all ranges
    312 
    313         int enumRanges = 1;
    314         TrieIterator iter  = new _testEnumValue(trie);
    315         RangeValueIterator.Element result = new RangeValueIterator.Element();
    316         while (iter.next(result)) {
    317             if (result.start != checkRanges[enumRanges -1].limit
    318                 || result.limit != checkRanges[enumRanges].limit
    319                 || (result.value ^ 0x5555) != checkRanges[enumRanges].value) {
    320                 errln("utrie_enum() delivers wrong range [U+"
    321                       + Integer.toHexString(result.start) + "..U+"
    322                       + Integer.toHexString(result.limit) + "].0x"
    323                       + Integer.toHexString(result.value ^ 0x5555)
    324                       + " instead of [U+"
    325                       + Integer.toHexString(checkRanges[enumRanges -1].limit)
    326                       + "..U+"
    327                       + Integer.toHexString(checkRanges[enumRanges].limit)
    328                       + "].0x"
    329                       + Integer.toHexString(checkRanges[enumRanges].value));
    330             }
    331             enumRanges ++;
    332         }
    333 
    334         // test linear Latin-1 range
    335         if (trie.isLatin1Linear()) {
    336             for (start = 0; start < 0x100; ++ start) {
    337                 if (trie.getLatin1LinearValue((char)start)
    338                     != trie.getLeadValue((char)start)) {
    339                     errln("trie.getLatin1LinearValue[U+"
    340                           + Integer.toHexString(start) + "]=0x"
    341                           + Integer.toHexString(
    342                                         trie.getLatin1LinearValue((char)start))
    343                           + " instead of 0x"
    344                           + Integer.toHexString(
    345                                         trie.getLeadValue((char)start)));
    346                 }
    347             }
    348         }
    349 
    350         _testTrieIteration(trie, checkRanges, countCheckRanges);
    351     }
    352 
    353     private void _testTrieRanges2(SetRange setRanges[],
    354                                   int countSetRanges,
    355                                   CheckRange checkRanges[],
    356                                   int countCheckRanges)
    357     {
    358         _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,
    359                         false);
    360 
    361         _testTrieRanges(setRanges, countSetRanges, checkRanges, countCheckRanges,
    362                         true);
    363     }
    364 
    365     private void _testTrieRanges4(SetRange setRanges[], int countSetRanges,
    366                                   CheckRange checkRanges[],
    367                                   int countCheckRanges)
    368     {
    369         _testTrieRanges2(setRanges, countSetRanges, checkRanges,
    370                          countCheckRanges);
    371     }
    372 
    373     // test data ------------------------------------------------------------
    374 
    375     /**
    376      * set consecutive ranges, even with value 0
    377      */
    378     private static SetRange setRanges1[]={
    379         new SetRange(0,      0x20,       0,      false),
    380         new SetRange(0x20,   0xa7,       0x1234, false),
    381         new SetRange(0xa7,   0x3400,     0,      false),
    382         new SetRange(0x3400, 0x9fa6,     0x6162, false),
    383         new SetRange(0x9fa6, 0xda9e,     0x3132, false),
    384         // try to disrupt _testFoldingOffset16()
    385         new SetRange(0xdada, 0xeeee,     0x87ff, false),
    386         new SetRange(0xeeee, 0x11111,    1,      false),
    387         new SetRange(0x11111, 0x44444,   0x6162, false),
    388         new SetRange(0x44444, 0x60003,   0,      false),
    389         new SetRange(0xf0003, 0xf0004,   0xf,    false),
    390         new SetRange(0xf0004, 0xf0006,   0x10,   false),
    391         new SetRange(0xf0006, 0xf0007,   0x11,   false),
    392         new SetRange(0xf0007, 0xf0020,   0x12,   false),
    393         new SetRange(0xf0020, 0x110000,  0,      false)
    394     };
    395 
    396     private static CheckRange checkRanges1[]={
    397         new CheckRange(0,      0), // dummy start range to make _testEnumRange() simpler
    398         new CheckRange(0x20,   0),
    399         new CheckRange(0xa7,   0x1234),
    400         new CheckRange(0x3400, 0),
    401         new CheckRange(0x9fa6, 0x6162),
    402         new CheckRange(0xda9e, 0x3132),
    403         new CheckRange(0xdada, 0),
    404         new CheckRange(0xeeee, 0x87ff),
    405         new CheckRange(0x11111,1),
    406         new CheckRange(0x44444,0x6162),
    407         new CheckRange(0xf0003,0),
    408         new CheckRange(0xf0004,0xf),
    409         new CheckRange(0xf0006,0x10),
    410         new CheckRange(0xf0007,0x11),
    411         new CheckRange(0xf0020,0x12),
    412         new CheckRange(0x110000, 0)
    413     };
    414 
    415     /**
    416      * set some interesting overlapping ranges
    417      */
    418     private static SetRange setRanges2[]={
    419         new SetRange(0x21,   0x7f,       0x5555, true),
    420         new SetRange(0x2f800,0x2fedc,    0x7a,   true),
    421         new SetRange(0x72,   0xdd,       3,      true),
    422         new SetRange(0xdd,   0xde,       4,      false),
    423         new SetRange(0x2f987,0x2fa98,    5,      true),
    424         new SetRange(0x2f777,0x2f833,    0,      true),
    425         new SetRange(0x2f900,0x2ffee,    1,      false),
    426         new SetRange(0x2ffee,0x2ffef,    2,      true)
    427     };
    428 
    429     private static CheckRange checkRanges2[]={
    430         // dummy start range to make _testEnumRange() simpler
    431         new CheckRange(0,      0),
    432         new CheckRange(0x21,   0),
    433         new CheckRange(0x72,   0x5555),
    434         new CheckRange(0xdd,   3),
    435         new CheckRange(0xde,   4),
    436         new CheckRange(0x2f833,0),
    437         new CheckRange(0x2f987,0x7a),
    438         new CheckRange(0x2fa98,5),
    439         new CheckRange(0x2fedc,0x7a),
    440         new CheckRange(0x2ffee,1),
    441         new CheckRange(0x2ffef,2),
    442         new CheckRange(0x110000, 0)
    443     };
    444 
    445     /**
    446      * use a non-zero initial value
    447      */
    448     private static SetRange setRanges3[]={
    449         new SetRange(0x31,   0xa4,   1,  false),
    450         new SetRange(0x3400, 0x6789, 2,  false),
    451         new SetRange(0x30000,0x34567,9,  true),
    452         new SetRange(0x45678,0x56789,3,  true)
    453     };
    454 
    455     private static CheckRange checkRanges3[]={
    456         // dummy start range, also carries the initial value
    457         new CheckRange(0,      9),
    458         new CheckRange(0x31,   9),
    459         new CheckRange(0xa4,   1),
    460         new CheckRange(0x3400, 9),
    461         new CheckRange(0x6789, 2),
    462         new CheckRange(0x45678,9),
    463         new CheckRange(0x56789,3),
    464         new CheckRange(0x110000,9)
    465     };
    466 
    467     @Test
    468     public void TestIntTrie()
    469     {
    470         _testTrieRanges4(setRanges1, setRanges1.length, checkRanges1,
    471                          checkRanges1.length);
    472         _testTrieRanges4(setRanges2, setRanges2.length, checkRanges2,
    473                          checkRanges2.length);
    474         _testTrieRanges4(setRanges3, setRanges3.length, checkRanges3,
    475                          checkRanges3.length);
    476     }
    477 
    478     private static class DummyGetFoldingOffset implements Trie.DataManipulate {
    479         @Override
    480         public int getFoldingOffset(int value) {
    481             return -1; /* never get non-initialValue data for supplementary code points */
    482         }
    483     }
    484 
    485     @Test
    486     public void TestDummyCharTrie() {
    487         CharTrie trie;
    488         final int initialValue=0x313, leadUnitValue=0xaffe;
    489         int value;
    490         int c;
    491         trie=new CharTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());
    492 
    493         /* test that all code points have initialValue */
    494         for(c=0; c<=0x10ffff; ++c) {
    495             value=trie.getCodePointValue(c);
    496             if(value!=initialValue) {
    497                 errln("CharTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));
    498             }
    499         }
    500 
    501         /* test that the lead surrogate code units have leadUnitValue */
    502         for(c=0xd800; c<=0xdbff; ++c) {
    503             value=trie.getLeadValue((char)c);
    504             if(value!=leadUnitValue) {
    505                 errln("CharTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));
    506             }
    507         }
    508     }
    509 
    510     @Test
    511     public void TestDummyIntTrie() {
    512         IntTrie trie;
    513         final int initialValue=0x01234567, leadUnitValue=0x89abcdef;
    514         int value;
    515         int c;
    516         trie=new IntTrie(initialValue, leadUnitValue, new DummyGetFoldingOffset());
    517 
    518         /* test that all code points have initialValue */
    519         for(c=0; c<=0x10ffff; ++c) {
    520             value=trie.getCodePointValue(c);
    521             if(value!=initialValue) {
    522                 errln("IntTrie/dummy.getCodePointValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(initialValue));
    523             }
    524         }
    525 
    526         /* test that the lead surrogate code units have leadUnitValue */
    527         for(c=0xd800; c<=0xdbff; ++c) {
    528             value=trie.getLeadValue((char)c);
    529             if(value!=leadUnitValue) {
    530                 errln("IntTrie/dummy.getLeadValue(c)(U+"+hex(c)+")=0x"+hex(value)+" instead of 0x"+hex(leadUnitValue));
    531             }
    532         }
    533     }
    534 }
    535