Home | History | Annotate | Download | only in lang
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 1996-2014, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  *******************************************************************************
      6  */
      7 package com.ibm.icu.dev.test.lang;
      8 
      9 import java.text.NumberFormat;
     10 import java.text.ParsePosition;
     11 import java.util.ArrayList;
     12 import java.util.Arrays;
     13 import java.util.Collection;
     14 import java.util.Comparator;
     15 import java.util.HashMap;
     16 import java.util.HashSet;
     17 import java.util.Iterator;
     18 import java.util.LinkedHashSet;
     19 import java.util.List;
     20 import java.util.Set;
     21 import java.util.SortedSet;
     22 import java.util.TreeSet;
     23 
     24 import com.ibm.icu.dev.test.TestFmwk;
     25 import com.ibm.icu.dev.util.CollectionUtilities;
     26 import com.ibm.icu.impl.SortedSetRelation;
     27 import com.ibm.icu.impl.Utility;
     28 import com.ibm.icu.lang.UCharacter;
     29 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
     30 import com.ibm.icu.lang.UProperty;
     31 import com.ibm.icu.lang.UScript;
     32 import com.ibm.icu.text.SymbolTable;
     33 import com.ibm.icu.text.UTF16;
     34 import com.ibm.icu.text.UnicodeMatcher;
     35 import com.ibm.icu.text.UnicodeSet;
     36 import com.ibm.icu.text.UnicodeSet.ComparisonStyle;
     37 import com.ibm.icu.text.UnicodeSet.EntryRange;
     38 import com.ibm.icu.text.UnicodeSet.SpanCondition;
     39 import com.ibm.icu.text.UnicodeSetIterator;
     40 import com.ibm.icu.text.UnicodeSetSpanner;
     41 import com.ibm.icu.text.UnicodeSetSpanner.CountMethod;
     42 import com.ibm.icu.text.UnicodeSetSpanner.TrimOption;
     43 import com.ibm.icu.util.OutputInt;
     44 
     45 /**
     46  * @test
     47  * @summary General test of UnicodeSet
     48  */
     49 public class UnicodeSetTest extends TestFmwk {
     50 
     51     static final String NOT = "%%%%";
     52 
     53     public static void main(String[] args) throws Exception {
     54         new UnicodeSetTest().run(args);
     55     }
     56 
     57     private static final boolean isCccValue(int ccc) {
     58         switch (ccc) {
     59         case 0:
     60         case 1:
     61         case 7:
     62         case 8:
     63         case 9:
     64         case 200:
     65         case 202:
     66         case 216:
     67         case 218:
     68         case 220:
     69         case 222:
     70         case 224:
     71         case 226:
     72         case 228:
     73         case 230:
     74         case 232:
     75         case 233:
     76         case 234:
     77         case 240:
     78             return true;
     79         default:
     80             return false;
     81         }
     82     }
     83 
     84     public void TestPropertyAccess() {
     85         int count = 0;
     86         // test to see that all of the names work
     87         for (int propNum = UProperty.BINARY_START; propNum < UProperty.INT_LIMIT; ++propNum) {
     88             count++;
     89             //Skipping tests in the non-exhaustive mode to shorten the test time ticket#6475
     90             if(getInclusion()<=5 && count%5!=0){
     91                 continue;
     92             }
     93             if (propNum >= UProperty.BINARY_LIMIT && propNum < UProperty.INT_START) { // skip the gap
     94                 propNum = UProperty.INT_START;
     95             }
     96             for (int nameChoice = UProperty.NameChoice.SHORT; nameChoice <= UProperty.NameChoice.LONG; ++nameChoice) {
     97                 String propName;
     98                 try {
     99                     propName = UCharacter.getPropertyName(propNum, nameChoice);
    100                     if (propName == null) {
    101                         if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
    102                         throw new NullPointerException();
    103                     }
    104                 } catch (RuntimeException e1) {
    105                     errln("Can't get property name for: "
    106                             + "Property (" + propNum + ")"
    107                             + ", NameChoice: " + nameChoice + ", "
    108                             + e1.getClass().getName());
    109                     continue;
    110                 }
    111                 logln("Property (" + propNum + "): " + propName);
    112                 for (int valueNum = UCharacter.getIntPropertyMinValue(propNum); valueNum <= UCharacter.getIntPropertyMaxValue(propNum); ++valueNum) {
    113                     String valueName;
    114                     try {
    115                         valueName = UCharacter.getPropertyValueName(propNum, valueNum, nameChoice);
    116                         if (valueName == null) {
    117                             if (nameChoice == UProperty.NameChoice.SHORT) continue; // allow non-existent short names
    118                             if ((propNum == UProperty.CANONICAL_COMBINING_CLASS ||
    119                                     propNum == UProperty.LEAD_CANONICAL_COMBINING_CLASS ||
    120                                     propNum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) &&
    121                                     !isCccValue(valueNum)) {
    122                                 // Only a few of the canonical combining classes have names.
    123                                 // Otherwise they are just integer values.
    124                                 continue;
    125                             } else {
    126                                 throw new NullPointerException();
    127                             }
    128                         }
    129                     } catch (RuntimeException e1) {
    130                         errln("Can't get property value name for: "
    131                                 + "Property (" + propNum + "): " + propName + ", "
    132                                 + "Value (" + valueNum + ") "
    133                                 + ", NameChoice: " + nameChoice + ", "
    134                                 + e1.getClass().getName());
    135                         continue;
    136                     }
    137                     logln("Value (" + valueNum + "): " + valueName);
    138                     UnicodeSet testSet;
    139                     try {
    140                         testSet = new UnicodeSet("[:" + propName + "=" + valueName + ":]");
    141                     } catch (RuntimeException e) {
    142                         errln("Can't create UnicodeSet for: "
    143                                 + "Property (" + propNum + "): " + propName + ", "
    144                                 + "Value (" + valueNum + "): " + valueName + ", "
    145                                 + e.getClass().getName());
    146                         continue;
    147                     }
    148                     UnicodeSet collectedErrors = new UnicodeSet();
    149                     for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.next();) {
    150                         int value = UCharacter.getIntPropertyValue(it.codepoint, propNum);
    151                         if (value != valueNum) {
    152                             collectedErrors.add(it.codepoint);
    153                         }
    154                     }
    155                     if (collectedErrors.size() != 0) {
    156                         errln("Property Value Differs: "
    157                                 + "Property (" + propNum + "): " + propName + ", "
    158                                 + "Value (" + valueNum + "): " + valueName + ", "
    159                                 + "Differing values: " + collectedErrors.toPattern(true));
    160                     }
    161                 }
    162             }
    163         }
    164     }
    165 
    166 
    167     /**
    168      * Test toPattern().
    169      */
    170     public void TestToPattern() throws Exception {
    171         // Test that toPattern() round trips with syntax characters
    172         // and whitespace.
    173         for (int i = 0; i < OTHER_TOPATTERN_TESTS.length; ++i) {
    174             checkPat(OTHER_TOPATTERN_TESTS[i], new UnicodeSet(OTHER_TOPATTERN_TESTS[i]));
    175         }
    176         for (int i = 0; i <= 0x10FFFF; ++i) {
    177             if ((i <= 0xFF && !UCharacter.isLetter(i)) || UCharacter.isWhitespace(i)) {
    178                 // check various combinations to make sure they all work.
    179                 if (i != 0 && !toPatternAux(i, i)) continue;
    180                 if (!toPatternAux(0, i)) continue;
    181                 if (!toPatternAux(i, 0xFFFF)) continue;
    182             }
    183         }
    184 
    185         // Test pattern behavior of multicharacter strings.
    186         UnicodeSet s = new UnicodeSet("[a-z {aa} {ab}]");
    187         expectToPattern(s, "[a-z{aa}{ab}]",
    188                 new String[] {"aa", "ab", NOT, "ac"});
    189         s.add("ac");
    190         expectToPattern(s, "[a-z{aa}{ab}{ac}]",
    191                 new String[] {"aa", "ab", "ac", NOT, "xy"});
    192 
    193         s.applyPattern("[a-z {\\{l} {r\\}}]");
    194         expectToPattern(s, "[a-z{r\\}}{\\{l}]",
    195                 new String[] {"{l", "r}", NOT, "xy"});
    196         s.add("[]");
    197         expectToPattern(s, "[a-z{\\[\\]}{r\\}}{\\{l}]",
    198                 new String[] {"{l", "r}", "[]", NOT, "xy"});
    199 
    200         s.applyPattern("[a-z {\u4E01\u4E02}{\\n\\r}]");
    201         expectToPattern(s, "[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]",
    202                 new String[] {"\u4E01\u4E02", "\n\r"});
    203 
    204         s.clear();
    205         s.add("abc");
    206         s.add("abc");
    207         expectToPattern(s, "[{abc}]",
    208                 new String[] {"abc", NOT, "ab"});
    209 
    210         // JB#3400: For 2 character ranges prefer [ab] to [a-b]
    211         s.clear();
    212         s.add('a', 'b');
    213         expectToPattern(s, "[ab]", null);
    214 
    215         // Cover applyPattern, applyPropertyAlias
    216         s.clear();
    217         s.applyPattern("[ab ]", true);
    218         expectToPattern(s, "[ab]", new String[] {"a", NOT, "ab", " "});
    219         s.clear();
    220         s.applyPattern("[ab ]", false);
    221         expectToPattern(s, "[\\ ab]", new String[] {"a", "\u0020", NOT, "ab"});
    222 
    223         s.clear();
    224         s.applyPropertyAlias("nv", "0.5");
    225         s.retainAll(new UnicodeSet("[:age=6.0:]"));  // stabilize this test
    226         expectToPattern(s, "[\\u00BD\\u0B73\\u0D74\\u0F2A\\u2CFD\\uA831\\U00010141\\U00010175\\U00010176\\U00010E7B]", null);
    227         // Unicode 5.1 adds Malayalam 1/2 (\u0D74)
    228         // Unicode 5.2 adds U+A831 NORTH INDIC FRACTION ONE HALF and U+10E7B RUMI FRACTION ONE HALF
    229         // Unicode 6.0 adds U+0B73 ORIYA FRACTION ONE HALF
    230 
    231         s.clear();
    232         s.applyPropertyAlias("gc", "Lu");
    233         // TODO expectToPattern(s, what?)
    234 
    235         // RemoveAllStrings()
    236         s.clear();
    237         s.applyPattern("[a-z{abc}{def}]");
    238         expectToPattern(s, "[a-z{abc}{def}]", null);
    239         s.removeAllStrings();
    240         expectToPattern(s, "[a-z]", null);
    241     }
    242 
    243     static String[] OTHER_TOPATTERN_TESTS = {
    244         "[[:latin:]&[:greek:]]",
    245         "[[:latin:]-[:greek:]]",
    246         "[:nonspacing mark:]"
    247     };
    248 
    249 
    250     public boolean toPatternAux(int start, int end) {
    251         // use Integer.toString because Utility.hex doesn't handle ints
    252         String source = "0x" + Integer.toString(start,16).toUpperCase();
    253         if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
    254         UnicodeSet testSet = new UnicodeSet();
    255         testSet.add(start, end);
    256         return checkPat(source, testSet);
    257     }
    258 
    259     boolean checkPat (String source, UnicodeSet testSet) {
    260         String pat = "";
    261         try {
    262             // What we want to make sure of is that a pattern generated
    263             // by toPattern(), with or without escaped unprintables, can
    264             // be passed back into the UnicodeSet constructor.
    265             String pat0 = testSet.toPattern(true);
    266             if (!checkPat(source + " (escaped)", testSet, pat0)) return false;
    267 
    268             //String pat1 = unescapeLeniently(pat0);
    269             //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
    270 
    271             String pat2 = testSet.toPattern(false);
    272             if (!checkPat(source, testSet, pat2)) return false;
    273 
    274             //String pat3 = unescapeLeniently(pat2);
    275             //if (!checkPat(source + " (in code)", testSet, pat3)) return false;
    276 
    277             //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
    278             logln(source + " => " + pat0 + ", " + pat2);
    279         } catch (Exception e) {
    280             errln("EXCEPTION in toPattern: " + source + " => " + pat);
    281             return false;
    282         }
    283         return true;
    284     }
    285 
    286     boolean checkPat (String source, UnicodeSet testSet, String pat) {
    287         UnicodeSet testSet2 = new UnicodeSet(pat);
    288         if (!testSet2.equals(testSet)) {
    289             errln("Fail toPattern: " + source + "; " + pat + " => " +
    290                     testSet2.toPattern(false) + ", expected " +
    291                     testSet.toPattern(false));
    292             return false;
    293         }
    294         return true;
    295     }
    296 
    297     // NOTE: copied the following from Utility. There ought to be a version in there with a flag
    298     // that does the Java stuff
    299 
    300     public static int unescapeAt(String s, int[] offset16) {
    301         int c;
    302         int result = 0;
    303         int n = 0;
    304         int minDig = 0;
    305         int maxDig = 0;
    306         int bitsPerDigit = 4;
    307         int dig;
    308         int i;
    309 
    310         /* Check that offset is in range */
    311         int offset = offset16[0];
    312         int length = s.length();
    313         if (offset < 0 || offset >= length) {
    314             return -1;
    315         }
    316 
    317         /* Fetch first UChar after '\\' */
    318         c = UTF16.charAt(s, offset);
    319         offset += UTF16.getCharCount(c);
    320 
    321         /* Convert hexadecimal and octal escapes */
    322         switch (c) {
    323         case 'u':
    324             minDig = maxDig = 4;
    325             break;
    326             /*
    327          case 'U':
    328          minDig = maxDig = 8;
    329          break;
    330          case 'x':
    331          minDig = 1;
    332          maxDig = 2;
    333          break;
    334              */
    335         default:
    336             dig = UCharacter.digit(c, 8);
    337             if (dig >= 0) {
    338                 minDig = 1;
    339                 maxDig = 3;
    340                 n = 1; /* Already have first octal digit */
    341                 bitsPerDigit = 3;
    342                 result = dig;
    343             }
    344             break;
    345         }
    346         if (minDig != 0) {
    347             while (offset < length && n < maxDig) {
    348                 // TEMPORARY
    349                 // TODO: Restore the char32-based code when UCharacter.digit
    350                 // is working (Bug 66).
    351 
    352                 //c = UTF16.charAt(s, offset);
    353                 //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
    354                 c = s.charAt(offset);
    355                 dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
    356                 if (dig < 0) {
    357                     break;
    358                 }
    359                 result = (result << bitsPerDigit) | dig;
    360                 //offset += UTF16.getCharCount(c);
    361                 ++offset;
    362                 ++n;
    363             }
    364             if (n < minDig) {
    365                 return -1;
    366             }
    367             offset16[0] = offset;
    368             return result;
    369         }
    370 
    371         /* Convert C-style escapes in table */
    372         for (i=0; i<UNESCAPE_MAP.length; i+=2) {
    373             if (c == UNESCAPE_MAP[i]) {
    374                 offset16[0] = offset;
    375                 return UNESCAPE_MAP[i+1];
    376             } else if (c < UNESCAPE_MAP[i]) {
    377                 break;
    378             }
    379         }
    380 
    381         /* If no special forms are recognized, then consider
    382          * the backslash to generically escape the next character. */
    383         offset16[0] = offset;
    384         return c;
    385     }
    386 
    387     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
    388     static private final char[] UNESCAPE_MAP = {
    389         /*"   0x22, 0x22 */
    390         /*'   0x27, 0x27 */
    391         /*?   0x3F, 0x3F */
    392         /*\   0x5C, 0x5C */
    393         /*a*/ 0x61, 0x07,
    394         /*b*/ 0x62, 0x08,
    395         /*f*/ 0x66, 0x0c,
    396         /*n*/ 0x6E, 0x0a,
    397         /*r*/ 0x72, 0x0d,
    398         /*t*/ 0x74, 0x09,
    399         /*v*/ 0x76, 0x0b
    400     };
    401 
    402     /**
    403      * Convert all escapes in a given string using unescapeAt().
    404      * Leave invalid escape sequences unchanged.
    405      */
    406     public static String unescapeLeniently(String s) {
    407         StringBuffer buf = new StringBuffer();
    408         int[] pos = new int[1];
    409         for (int i=0; i<s.length(); ) {
    410             char c = s.charAt(i++);
    411             if (c == '\\') {
    412                 pos[0] = i;
    413                 int e = unescapeAt(s, pos);
    414                 if (e < 0) {
    415                     buf.append(c);
    416                 } else {
    417                     UTF16.append(buf, e);
    418                     i = pos[0];
    419                 }
    420             } else {
    421                 buf.append(c);
    422             }
    423         }
    424         return buf.toString();
    425     }
    426 
    427     public void TestPatterns() {
    428         UnicodeSet set = new UnicodeSet();
    429         expectPattern(set, "[[a-m]&[d-z]&[k-y]]",  "km");
    430         expectPattern(set, "[[a-z]-[m-y]-[d-r]]",  "aczz");
    431         expectPattern(set, "[a\\-z]",  "--aazz");
    432         expectPattern(set, "[-az]",  "--aazz");
    433         expectPattern(set, "[az-]",  "--aazz");
    434         expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
    435 
    436         // Throw in a test of complement
    437         set.complement();
    438         String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
    439         expectPairs(set, exp);
    440     }
    441 
    442     public void TestCategories() {
    443         int failures = 0;
    444         UnicodeSet set = new UnicodeSet("[:Lu:]");
    445         expectContainment(set, "ABC", "abc");
    446 
    447         // Make sure generation of L doesn't pollute cached Lu set
    448         // First generate L, then Lu
    449         // not used int TOP = 0x200; // Don't need to go over the whole range:
    450         set = new UnicodeSet("[:L:]");
    451         for (int i=0; i<0x200; ++i) {
    452             boolean l = UCharacter.isLetter(i);
    453             if (l != set.contains((char)i)) {
    454                 errln("FAIL: L contains " + (char)i + " = " +
    455                         set.contains((char)i));
    456                 if (++failures == 10) break;
    457             }
    458         }
    459 
    460         set = new UnicodeSet("[:Lu:]");
    461         for (int i=0; i<0x200; ++i) {
    462             boolean lu = (UCharacter.getType(i) == ECharacterCategory.UPPERCASE_LETTER);
    463             if (lu != set.contains((char)i)) {
    464                 errln("FAIL: Lu contains " + (char)i + " = " +
    465                         set.contains((char)i));
    466                 if (++failures == 20) break;
    467             }
    468         }
    469     }
    470 
    471     public void TestAddRemove() {
    472         UnicodeSet set = new UnicodeSet();
    473         set.add('a', 'z');
    474         expectPairs(set, "az");
    475         set.remove('m', 'p');
    476         expectPairs(set, "alqz");
    477         set.remove('e', 'g');
    478         expectPairs(set, "adhlqz");
    479         set.remove('d', 'i');
    480         expectPairs(set, "acjlqz");
    481         set.remove('c', 'r');
    482         expectPairs(set, "absz");
    483         set.add('f', 'q');
    484         expectPairs(set, "abfqsz");
    485         set.remove('a', 'g');
    486         expectPairs(set, "hqsz");
    487         set.remove('a', 'z');
    488         expectPairs(set, "");
    489 
    490         // Try removing an entire set from another set
    491         expectPattern(set, "[c-x]", "cx");
    492         UnicodeSet set2 = new UnicodeSet();
    493         expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
    494         set.removeAll(set2);
    495         expectPairs(set, "deluxx");
    496 
    497         // Try adding an entire set to another set
    498         expectPattern(set, "[jackiemclean]", "aacceein");
    499         expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
    500         set.addAll(set2);
    501         expectPairs(set, "aacehort");
    502 
    503         // Test commutativity
    504         expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
    505         expectPattern(set2, "[jackiemclean]", "aacceein");
    506         set.addAll(set2);
    507         expectPairs(set, "aacehort");
    508     }
    509 
    510     /**
    511      * Make sure minimal representation is maintained.
    512      */
    513     public void TestMinimalRep() {
    514         // This is pretty thoroughly tested by checkCanonicalRep()
    515         // run against the exhaustive operation results.  Use the code
    516         // here for debugging specific spot problems.
    517 
    518         // 1 overlap against 2
    519         UnicodeSet set = new UnicodeSet("[h-km-q]");
    520         UnicodeSet set2 = new UnicodeSet("[i-o]");
    521         set.addAll(set2);
    522         expectPairs(set, "hq");
    523         // right
    524         set.applyPattern("[a-m]");
    525         set2.applyPattern("[e-o]");
    526         set.addAll(set2);
    527         expectPairs(set, "ao");
    528         // left
    529         set.applyPattern("[e-o]");
    530         set2.applyPattern("[a-m]");
    531         set.addAll(set2);
    532         expectPairs(set, "ao");
    533         // 1 overlap against 3
    534         set.applyPattern("[a-eg-mo-w]");
    535         set2.applyPattern("[d-q]");
    536         set.addAll(set2);
    537         expectPairs(set, "aw");
    538     }
    539 
    540     public void TestAPI() {
    541         // default ct
    542         UnicodeSet set = new UnicodeSet();
    543         if (!set.isEmpty() || set.getRangeCount() != 0) {
    544             errln("FAIL, set should be empty but isn't: " +
    545                     set);
    546         }
    547 
    548         // clear(), isEmpty()
    549         set.add('a');
    550         if (set.isEmpty()) {
    551             errln("FAIL, set shouldn't be empty but is: " +
    552                     set);
    553         }
    554         set.clear();
    555         if (!set.isEmpty()) {
    556             errln("FAIL, set should be empty but isn't: " +
    557                     set);
    558         }
    559 
    560         // size()
    561         set.clear();
    562         if (set.size() != 0) {
    563             errln("FAIL, size should be 0, but is " + set.size() +
    564                     ": " + set);
    565         }
    566         set.add('a');
    567         if (set.size() != 1) {
    568             errln("FAIL, size should be 1, but is " + set.size() +
    569                     ": " + set);
    570         }
    571         set.add('1', '9');
    572         if (set.size() != 10) {
    573             errln("FAIL, size should be 10, but is " + set.size() +
    574                     ": " + set);
    575         }
    576         set.clear();
    577         set.complement();
    578         if (set.size() != 0x110000) {
    579             errln("FAIL, size should be 0x110000, but is" + set.size());
    580         }
    581 
    582         // contains(first, last)
    583         set.clear();
    584         set.applyPattern("[A-Y 1-8 b-d l-y]");
    585         for (int i = 0; i<set.getRangeCount(); ++i) {
    586             int a = set.getRangeStart(i);
    587             int b = set.getRangeEnd(i);
    588             if (!set.contains(a, b)) {
    589                 errln("FAIL, should contain " + (char)a + '-' + (char)b +
    590                         " but doesn't: " + set);
    591             }
    592             if (set.contains((char)(a-1), b)) {
    593                 errln("FAIL, shouldn't contain " +
    594                         (char)(a-1) + '-' + (char)b +
    595                         " but does: " + set);
    596             }
    597             if (set.contains(a, (char)(b+1))) {
    598                 errln("FAIL, shouldn't contain " +
    599                         (char)a + '-' + (char)(b+1) +
    600                         " but does: " + set);
    601             }
    602         }
    603 
    604         // Ported InversionList test.
    605         UnicodeSet a = new UnicodeSet((char)3,(char)10);
    606         UnicodeSet b = new UnicodeSet((char)7,(char)15);
    607         UnicodeSet c = new UnicodeSet();
    608 
    609         logln("a [3-10]: " + a);
    610         logln("b [7-15]: " + b);
    611         c.set(a); c.addAll(b);
    612         UnicodeSet exp = new UnicodeSet((char)3,(char)15);
    613         if (c.equals(exp)) {
    614             logln("c.set(a).add(b): " + c);
    615         } else {
    616             errln("FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
    617         }
    618         c.complement();
    619         exp.set((char)0, (char)2);
    620         exp.add((char)16, UnicodeSet.MAX_VALUE);
    621         if (c.equals(exp)) {
    622             logln("c.complement(): " + c);
    623         } else {
    624             errln(Utility.escape("FAIL: c.complement() = " + c + ", expect " + exp));
    625         }
    626         c.complement();
    627         exp.set((char)3, (char)15);
    628         if (c.equals(exp)) {
    629             logln("c.complement(): " + c);
    630         } else {
    631             errln("FAIL: c.complement() = " + c + ", expect " + exp);
    632         }
    633         c.set(a); c.complementAll(b);
    634         exp.set((char)3,(char)6);
    635         exp.add((char)11,(char) 15);
    636         if (c.equals(exp)) {
    637             logln("c.set(a).complement(b): " + c);
    638         } else {
    639             errln("FAIL: c.set(a).complement(b) = " + c + ", expect " + exp);
    640         }
    641 
    642         exp.set(c);
    643         c = bitsToSet(setToBits(c));
    644         if (c.equals(exp)) {
    645             logln("bitsToSet(setToBits(c)): " + c);
    646         } else {
    647             errln("FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
    648         }
    649 
    650         // Additional tests for coverage JB#2118
    651         //UnicodeSet::complement(class UnicodeString const &)
    652         //UnicodeSet::complementAll(class UnicodeString const &)
    653         //UnicodeSet::containsNone(class UnicodeSet const &)
    654         //UnicodeSet::containsNone(long,long)
    655         //UnicodeSet::containsSome(class UnicodeSet const &)
    656         //UnicodeSet::containsSome(long,long)
    657         //UnicodeSet::removeAll(class UnicodeString const &)
    658         //UnicodeSet::retain(long)
    659         //UnicodeSet::retainAll(class UnicodeString const &)
    660         //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
    661         //UnicodeSetIterator::getString(void)
    662         set.clear();
    663         set.complement("ab");
    664         exp.applyPattern("[{ab}]");
    665         if (!set.equals(exp)) { errln("FAIL: complement(\"ab\")"); return; }
    666 
    667         UnicodeSetIterator iset = new UnicodeSetIterator(set);
    668         if (!iset.next() || iset.codepoint != UnicodeSetIterator.IS_STRING) {
    669             errln("FAIL: UnicodeSetIterator.next/IS_STRING");
    670         } else if (!iset.string.equals("ab")) {
    671             errln("FAIL: UnicodeSetIterator.string");
    672         }
    673 
    674         set.add((char)0x61, (char)0x7A);
    675         set.complementAll("alan");
    676         exp.applyPattern("[{ab}b-kmo-z]");
    677         if (!set.equals(exp)) { errln("FAIL: complementAll(\"alan\")"); return; }
    678 
    679         exp.applyPattern("[a-z]");
    680         if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
    681         if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
    682         exp.applyPattern("[aln]");
    683         if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
    684         if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
    685 
    686         if (set.containsNone((char)0x61, (char)0x7A)) {
    687             errln("FAIL: containsNone(char, char)");
    688         }
    689         if (!set.containsSome((char)0x61, (char)0x7A)) {
    690             errln("FAIL: containsSome(char, char)");
    691         }
    692         if (!set.containsNone((char)0x41, (char)0x5A)) {
    693             errln("FAIL: containsNone(char, char)");
    694         }
    695         if (set.containsSome((char)0x41, (char)0x5A)) {
    696             errln("FAIL: containsSome(char, char)");
    697         }
    698 
    699         set.removeAll("liu");
    700         exp.applyPattern("[{ab}b-hj-kmo-tv-z]");
    701         if (!set.equals(exp)) { errln("FAIL: removeAll(\"liu\")"); return; }
    702 
    703         set.retainAll("star");
    704         exp.applyPattern("[rst]");
    705         if (!set.equals(exp)) { errln("FAIL: retainAll(\"star\")"); return; }
    706 
    707         set.retain((char)0x73);
    708         exp.applyPattern("[s]");
    709         if (!set.equals(exp)) { errln("FAIL: retain('s')"); return; }
    710 
    711         // ICU 2.6 coverage tests
    712         // public final UnicodeSet retain(String s);
    713         // public final UnicodeSet remove(int c);
    714         // public final UnicodeSet remove(String s);
    715         // public int hashCode();
    716         set.applyPattern("[a-z{ab}{cd}]");
    717         set.retain("cd");
    718         exp.applyPattern("[{cd}]");
    719         if (!set.equals(exp)) { errln("FAIL: retain(\"cd\")"); return; }
    720 
    721         set.applyPattern("[a-z{ab}{cd}]");
    722         set.remove((char)0x63);
    723         exp.applyPattern("[abd-z{ab}{cd}]");
    724         if (!set.equals(exp)) { errln("FAIL: remove('c')"); return; }
    725 
    726         set.remove("cd");
    727         exp.applyPattern("[abd-z{ab}]");
    728         if (!set.equals(exp)) { errln("FAIL: remove(\"cd\")"); return; }
    729 
    730         if (set.hashCode() != exp.hashCode()) {
    731             errln("FAIL: hashCode() unequal");
    732         }
    733         exp.clear();
    734         if (set.hashCode() == exp.hashCode()) {
    735             errln("FAIL: hashCode() equal");
    736         }
    737 
    738         {
    739             //Cover addAll(Collection) and addAllTo(Collection)
    740             //  Seems that there is a bug in addAll(Collection) operation
    741             //    Ram also add a similar test to UtilityTest.java
    742             logln("Testing addAll(Collection) ... ");
    743             String[] array = {"a", "b", "c", "de"};
    744             List list = Arrays.asList(array);
    745             Set aset = new HashSet(list);
    746             logln(" *** The source set's size is: " + aset.size());
    747 
    748             set.clear();
    749             set.addAll(aset);
    750             if (set.size() != aset.size()) {
    751                 errln("FAIL: After addAll, the UnicodeSet size expected " + aset.size() +
    752                         ", " + set.size() + " seen instead!");
    753             } else {
    754                 logln("OK: After addAll, the UnicodeSet size got " + set.size());
    755             }
    756 
    757             List list2 = new ArrayList();
    758             set.addAllTo(list2);
    759 
    760             //verify the result
    761             log(" *** The elements are: ");
    762             String s = set.toPattern(true);
    763             logln(s);
    764             Iterator myiter = list2.iterator();
    765             while(myiter.hasNext()) {
    766                 log(myiter.next().toString() + "  ");
    767             }
    768             logln("");  // a new line
    769         }
    770 
    771     }
    772 
    773     public void TestStrings() {
    774         //  Object[][] testList = {
    775         //  {I_EQUALS,  UnicodeSet.fromAll("abc"),
    776         //  new UnicodeSet("[a-c]")},
    777         //
    778         //  {I_EQUALS,  UnicodeSet.from("ch").add('a','z').add("ll"),
    779         //  new UnicodeSet("[{ll}{ch}a-z]")},
    780         //
    781         //  {I_EQUALS,  UnicodeSet.from("ab}c"),
    782         //  new UnicodeSet("[{ab\\}c}]")},
    783         //
    784         //  {I_EQUALS,  new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
    785         //  new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
    786         //  };
    787         //
    788         //  for (int i = 0; i < testList.length; ++i) {
    789         //  expectRelation(testList[i][0], testList[i][1], testList[i][2], "(" + i + ")");
    790         //  }
    791 
    792         UnicodeSet[][] testList = {
    793                 {UnicodeSet.fromAll("abc"),
    794                     new UnicodeSet("[a-c]")},
    795 
    796                     {UnicodeSet.from("ch").add('a','z').add("ll"),
    797                         new UnicodeSet("[{ll}{ch}a-z]")},
    798 
    799                         {UnicodeSet.from("ab}c"),
    800                             new UnicodeSet("[{ab\\}c}]")},
    801 
    802                             {new UnicodeSet('a','z').add('A', 'Z').retain('M','m').complement('X'),
    803                                 new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]")},
    804         };
    805 
    806         for (int i = 0; i < testList.length; ++i) {
    807             if (!testList[i][0].equals(testList[i][1])) {
    808                 errln("FAIL: sets unequal; see source code (" + i + ")");
    809             }
    810         }
    811     }
    812 
    813     static final Integer
    814     I_ANY = new Integer(SortedSetRelation.ANY),
    815     I_CONTAINS = new Integer(SortedSetRelation.CONTAINS),
    816     I_DISJOINT = new Integer(SortedSetRelation.DISJOINT),
    817     I_NO_B = new Integer(SortedSetRelation.NO_B),
    818     I_ISCONTAINED = new Integer(SortedSetRelation.ISCONTAINED),
    819     I_EQUALS = new Integer(SortedSetRelation.EQUALS),
    820     I_NO_A = new Integer(SortedSetRelation.NO_A),
    821     I_NONE = new Integer(SortedSetRelation.NONE);
    822 
    823     public void TestSetRelation() {
    824 
    825         String[] choices = {"a", "b", "cd", "ef"};
    826         int limit = 1 << choices.length;
    827 
    828         SortedSet iset = new TreeSet();
    829         SortedSet jset = new TreeSet();
    830 
    831         for (int i = 0; i < limit; ++i) {
    832             pick(i, choices, iset);
    833             for (int j = 0; j < limit; ++j) {
    834                 pick(j, choices, jset);
    835                 checkSetRelation(iset, jset, "(" + i + ")");
    836             }
    837         }
    838     }
    839 
    840     public void TestSetSpeed() {
    841         // skip unless verbose
    842         if (!isVerbose()) return;
    843 
    844         SetSpeed2(100);
    845         SetSpeed2(1000);
    846     }
    847 
    848     public void SetSpeed2(int size) {
    849 
    850         SortedSet iset = new TreeSet();
    851         SortedSet jset = new TreeSet();
    852 
    853         for (int i = 0; i < size*2; i += 2) { // only even values
    854             iset.add(new Integer(i));
    855             jset.add(new Integer(i));
    856         }
    857 
    858         int iterations = 1000000 / size;
    859 
    860         logln("Timing comparison of Java vs Utility");
    861         logln("For about " + size + " objects that are almost all the same.");
    862 
    863         CheckSpeed(iset, jset, "when a = b", iterations);
    864 
    865         iset.add(new Integer(size + 1));    // add odd value in middle
    866 
    867         CheckSpeed(iset, jset, "when a contains b", iterations);
    868         CheckSpeed(jset, iset, "when b contains a", iterations);
    869 
    870         jset.add(new Integer(size - 1));    // add different odd value in middle
    871 
    872         CheckSpeed(jset, iset, "when a, b are disjoint", iterations);
    873     }
    874 
    875     void CheckSpeed(SortedSet iset, SortedSet jset, String message, int iterations) {
    876         CheckSpeed2(iset, jset, message, iterations);
    877         CheckSpeed3(iset, jset, message, iterations);
    878     }
    879 
    880     void CheckSpeed2(SortedSet iset, SortedSet jset, String message, int iterations) {
    881         boolean x;
    882         boolean y;
    883 
    884         // make sure code is loaded:
    885         x = iset.containsAll(jset);
    886         y = SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
    887         if (x != y) errln("FAIL contains comparison");
    888 
    889         double start = System.currentTimeMillis();
    890         for (int i = 0; i < iterations; ++i) {
    891             x |= iset.containsAll(jset);
    892         }
    893         double middle = System.currentTimeMillis();
    894         for (int i = 0; i < iterations; ++i) {
    895             y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.CONTAINS, jset);
    896         }
    897         double end = System.currentTimeMillis();
    898 
    899         double jtime = (middle - start)/iterations;
    900         double utime = (end - middle)/iterations;
    901 
    902         NumberFormat nf = NumberFormat.getPercentInstance();
    903         logln("Test contains: " + message + ": Java: " + jtime
    904                 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
    905     }
    906 
    907     void CheckSpeed3(SortedSet iset, SortedSet jset, String message, int iterations) {
    908         boolean x;
    909         boolean y;
    910 
    911         // make sure code is loaded:
    912         x = iset.equals(jset);
    913         y = SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
    914         if (x != y) errln("FAIL equality comparison");
    915 
    916 
    917         double start = System.currentTimeMillis();
    918         for (int i = 0; i < iterations; ++i) {
    919             x |= iset.equals(jset);
    920         }
    921         double middle = System.currentTimeMillis();
    922         for (int i = 0; i < iterations; ++i) {
    923             y |= SortedSetRelation.hasRelation(iset, SortedSetRelation.EQUALS, jset);
    924         }
    925         double end = System.currentTimeMillis();
    926 
    927         double jtime = (middle - start)/iterations;
    928         double utime = (end - middle)/iterations;
    929 
    930         NumberFormat nf = NumberFormat.getPercentInstance();
    931         logln("Test equals:   " + message + ": Java: " + jtime
    932                 + ", Utility: " + utime + ", u:j: " + nf.format(utime/jtime));
    933     }
    934 
    935     void pick(int bits, Object[] examples, SortedSet output) {
    936         output.clear();
    937         for (int k = 0; k < 32; ++k) {
    938             if (((1<<k) & bits) != 0) output.add(examples[k]);
    939         }
    940     }
    941 
    942     public static final String[] RELATION_NAME = {
    943         "both-are-null",
    944         "a-is-null",
    945         "equals",
    946         "is-contained-in",
    947         "b-is-null",
    948         "is-disjoint_with",
    949         "contains",
    950         "any", };
    951 
    952     boolean dumbHasRelation(Collection A, int filter, Collection B) {
    953         Collection ab = new TreeSet(A);
    954         ab.retainAll(B);
    955         if (ab.size() > 0 && (filter & SortedSetRelation.A_AND_B) == 0) return false;
    956 
    957         // A - B size == A.size - A&B.size
    958         if (A.size() > ab.size() && (filter & SortedSetRelation.A_NOT_B) == 0) return false;
    959 
    960         // B - A size == B.size - A&B.size
    961         if (B.size() > ab.size() && (filter & SortedSetRelation.B_NOT_A) == 0) return false;
    962 
    963 
    964         return true;
    965     }
    966 
    967     void checkSetRelation(SortedSet a, SortedSet b, String message) {
    968         for (int i = 0; i < 8; ++i) {
    969 
    970             boolean hasRelation = SortedSetRelation.hasRelation(a, i, b);
    971             boolean dumbHasRelation = dumbHasRelation(a, i, b);
    972 
    973             logln(message + " " + hasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
    974 
    975             if (hasRelation != dumbHasRelation) {
    976                 errln("FAIL: " +
    977                         message + " " + dumbHasRelation + ":\t" + a + "\t" + RELATION_NAME[i] + "\t" + b);
    978             }
    979         }
    980         logln("");
    981     }
    982 
    983     /**
    984      * Test the [:Latin:] syntax.
    985      */
    986     public void TestScriptSet() {
    987 
    988         expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
    989 
    990         expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
    991 
    992         /* Jitterbug 1423 */
    993         expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
    994 
    995     }
    996 
    997     /**
    998      * Test the [:Latin:] syntax.
    999      */
   1000     public void TestPropertySet() {
   1001         String[] DATA = {
   1002                 // Pattern, Chars IN, Chars NOT in
   1003 
   1004                 "[:Latin:]",
   1005                 "aA",
   1006                 "\u0391\u03B1",
   1007 
   1008                 "[\\p{Greek}]",
   1009                 "\u0391\u03B1",
   1010                 "aA",
   1011 
   1012                 "\\P{ GENERAL Category = upper case letter }",
   1013                 "abc",
   1014                 "ABC",
   1015 
   1016                 // Combining class: @since ICU 2.2
   1017                 // Check both symbolic and numeric
   1018                 "\\p{ccc=Nukta}",
   1019                 "\u0ABC",
   1020                 "abc",
   1021 
   1022                 "\\p{Canonical Combining Class = 11}",
   1023                 "\u05B1",
   1024                 "\u05B2",
   1025 
   1026                 "[:c c c = iota subscript :]",
   1027                 "\u0345",
   1028                 "xyz",
   1029 
   1030                 // Bidi class: @since ICU 2.2
   1031                 "\\p{bidiclass=lefttoright}",
   1032                 "abc",
   1033                 "\u0671\u0672",
   1034 
   1035                 // Binary properties: @since ICU 2.2
   1036                 "\\p{ideographic}",
   1037                 "\u4E0A",
   1038                 "x",
   1039 
   1040                 "[:math=false:]",
   1041                 "q)*(", // )(and * were removed from math in Unicode 4.0.1
   1042                 "+<>^",
   1043 
   1044                 // JB#1767 \N{}, \p{ASCII}
   1045                 "[:Ascii:]",
   1046                 "abc\u0000\u007F",
   1047                 "\u0080\u4E00",
   1048 
   1049                 "[\\N{ latin small letter  a  }[:name= latin small letter z:]]",
   1050                 "az",
   1051                 "qrs",
   1052 
   1053                 // JB#2015
   1054                 "[:any:]",
   1055                 "a\\U0010FFFF",
   1056                 "",
   1057 
   1058                 "[:nv=0.5:]",
   1059                 "\u00BD\u0F2A",
   1060                 "\u00BC",
   1061 
   1062                 // JB#2653: Age
   1063                 "[:Age=1.1:]",
   1064                 "\u03D6", // 1.1
   1065                 "\u03D8\u03D9", // 3.2
   1066 
   1067                 "[:Age=3.1:]",
   1068                 "\\u1800\\u3400\\U0002f800",
   1069                 "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
   1070 
   1071                 // JB#2350: Case_Sensitive
   1072                 "[:Case Sensitive:]",
   1073                 "A\u1FFC\\U00010410",
   1074                 ";\u00B4\\U00010500",
   1075 
   1076 
   1077                 // Regex compatibility test
   1078                 "[-b]", // leading '-' is literal
   1079                 "-b",
   1080                 "ac",
   1081 
   1082                 "[^-b]", // leading '-' is literal
   1083                 "ac",
   1084                 "-b",
   1085 
   1086                 "[b-]", // trailing '-' is literal
   1087                 "-b",
   1088                 "ac",
   1089 
   1090                 "[^b-]", // trailing '-' is literal
   1091                 "ac",
   1092                 "-b",
   1093 
   1094                 "[a-b-]", // trailing '-' is literal
   1095                 "ab-",
   1096                 "c=",
   1097 
   1098                 "[[a-q]&[p-z]-]", // trailing '-' is literal
   1099                 "pq-",
   1100                 "or=",
   1101 
   1102                 "[\\s|\\)|:|$|\\>]", // from regex tests
   1103                 "s|):$>",
   1104                 "\\abc",
   1105 
   1106                 "[\uDC00cd]", // JB#2906: isolated trail at start
   1107                 "cd\uDC00",
   1108                 "ab\uD800\\U00010000",
   1109 
   1110                 "[ab\uD800]", // JB#2906: isolated trail at start
   1111                 "ab\uD800",
   1112                 "cd\uDC00\\U00010000",
   1113 
   1114                 "[ab\uD800cd]", // JB#2906: isolated lead in middle
   1115                 "abcd\uD800",
   1116                 "ef\uDC00\\U00010000",
   1117 
   1118                 "[ab\uDC00cd]", // JB#2906: isolated trail in middle
   1119                 "abcd\uDC00",
   1120                 "ef\uD800\\U00010000",
   1121 
   1122                 "[:^lccc=0:]", // Lead canonical class
   1123                 "\u0300\u0301",
   1124                 "abcd\u00c0\u00c5",
   1125 
   1126                 "[:^tccc=0:]", // Trail canonical class
   1127                 "\u0300\u0301\u00c0\u00c5",
   1128                 "abcd",
   1129 
   1130                 "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
   1131                 "\u0300\u0301\u00c0\u00c5",
   1132                 "abcd",
   1133 
   1134                 "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
   1135                 "",
   1136                 "abcd\u0300\u0301\u00c0\u00c5",
   1137 
   1138                 "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
   1139                 "\u0F73\u0F75\u0F81",
   1140                 "abcd\u0300\u0301\u00c0\u00c5",
   1141 
   1142                 "[:Assigned:]",
   1143                 "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
   1144                 "\\u0888\\uFDD3\\uFFFE\\U00050005",
   1145 
   1146                 // Script_Extensions, new in Unicode 6.0
   1147                 "[:scx=Arab:]",
   1148                 "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
   1149                 "\\u061D\\uFDEF\\uFDFE",
   1150 
   1151                 // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
   1152                 // so scx-sc is missing U+FDF2.
   1153                 "[[:Script_Extensions=Arabic:]-[:Arab:]]",
   1154                 "\\u0640\\u064B\\u0650\\u0655",
   1155                 "\\uFDF2"
   1156         };
   1157 
   1158         for (int i=0; i<DATA.length; i+=3) {
   1159             expectContainment(DATA[i], DATA[i+1], DATA[i+2]);
   1160         }
   1161     }
   1162 
   1163     public void TestUnicodeSetStrings() {
   1164         UnicodeSet uset = new UnicodeSet("[a{bc}{cd}pqr\u0000]");
   1165         logln(uset + " ~ " + uset.getRegexEquivalent());
   1166         String[][] testStrings = {{"x", "none"},
   1167                 {"bc", "all"},
   1168                 {"cdbca", "all"},
   1169                 {"a", "all"},
   1170                 {"bcx", "some"},
   1171                 {"ab", "some"},
   1172                 {"acb", "some"},
   1173                 {"bcda", "some"},
   1174                 {"dccbx", "none"},
   1175         };
   1176         for (int i = 0; i < testStrings.length; ++i) {
   1177             check(uset, testStrings[i][0], testStrings[i][1]);
   1178         }
   1179     }
   1180 
   1181 
   1182     private void check(UnicodeSet uset, String string, String desiredStatus) {
   1183         boolean shouldContainAll = desiredStatus.equals("all");
   1184         boolean shouldContainNone = desiredStatus.equals("none");
   1185         if (uset.containsAll(string) != shouldContainAll) {
   1186             errln("containsAll " +  string + " should be " + shouldContainAll);
   1187         } else {
   1188             logln("containsAll " +  string + " = " + shouldContainAll);
   1189         }
   1190         if (uset.containsNone(string) != shouldContainNone) {
   1191             errln("containsNone " +  string + " should be " + shouldContainNone);
   1192         } else {
   1193             logln("containsNone " +  string + " = " + shouldContainNone);
   1194         }
   1195     }
   1196 
   1197     /**
   1198      * Test cloning of UnicodeSet
   1199      */
   1200     public void TestClone() {
   1201         UnicodeSet s = new UnicodeSet("[abcxyz]");
   1202         UnicodeSet t = (UnicodeSet) s.clone();
   1203         expectContainment(t, "abc", "def");
   1204     }
   1205 
   1206     /**
   1207      * Test the indexOf() and charAt() methods.
   1208      */
   1209     public void TestIndexOf() {
   1210         UnicodeSet set = new UnicodeSet("[a-cx-y3578]");
   1211         for (int i=0; i<set.size(); ++i) {
   1212             int c = set.charAt(i);
   1213             if (set.indexOf(c) != i) {
   1214                 errln("FAIL: charAt(" + i + ") = " + c +
   1215                         " => indexOf() => " + set.indexOf(c));
   1216             }
   1217         }
   1218         int c = set.charAt(set.size());
   1219         if (c != -1) {
   1220             errln("FAIL: charAt(<out of range>) = " +
   1221                     Utility.escape(String.valueOf(c)));
   1222         }
   1223         int j = set.indexOf('q');
   1224         if (j != -1) {
   1225             errln("FAIL: indexOf('q') = " + j);
   1226         }
   1227     }
   1228 
   1229     public void TestContainsString() {
   1230         UnicodeSet x = new UnicodeSet("[a{bc}]");
   1231         if (x.contains("abc")) errln("FAIL");
   1232     }
   1233 
   1234     public void TestExhaustive() {
   1235         // exhaustive tests. Simulate UnicodeSets with integers.
   1236         // That gives us very solid tests (except for large memory tests).
   1237 
   1238         char limit = (char)128;
   1239 
   1240         for (char i = 0; i < limit; ++i) {
   1241             logln("Testing " + i + ", " + bitsToSet(i));
   1242             _testComplement(i);
   1243 
   1244             // AS LONG AS WE ARE HERE, check roundtrip
   1245             checkRoundTrip(bitsToSet(i));
   1246 
   1247             for (char j = 0; j < limit; ++j) {
   1248                 _testAdd(i,j);
   1249                 _testXor(i,j);
   1250                 _testRetain(i,j);
   1251                 _testRemove(i,j);
   1252             }
   1253         }
   1254     }
   1255 
   1256     /**
   1257      * Make sure each script name and abbreviated name can be used
   1258      * to construct a UnicodeSet.
   1259      */
   1260     public void TestScriptNames() {
   1261         for (int i=0; i<UScript.CODE_LIMIT; ++i) {
   1262             for (int j=0; j<2; ++j) {
   1263                 String pat = "";
   1264                 try {
   1265                     String name =
   1266                             (j==0) ? UScript.getName(i) : UScript.getShortName(i);
   1267                             pat = "[:" + name + ":]";
   1268                             UnicodeSet set = new UnicodeSet(pat);
   1269                             logln("Ok: " + pat + " -> " + set.toPattern(false));
   1270                 } catch (IllegalArgumentException e) {
   1271                     if (pat.length() == 0) {
   1272                         errln("FAIL (in UScript): No name for script " + i);
   1273                     } else {
   1274                         errln("FAIL: Couldn't create " + pat);
   1275                     }
   1276                 }
   1277             }
   1278         }
   1279     }
   1280 
   1281     /**
   1282      * Test closure API.
   1283      */
   1284     public void TestCloseOver() {
   1285         String CASE = String.valueOf(UnicodeSet.CASE);
   1286         String[] DATA = {
   1287                 // selector, input, output
   1288                 CASE,
   1289                 "[aq\u00DF{Bc}{bC}{Fi}]",
   1290                 "[aAqQ\u00DF\u1E9E\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
   1291 
   1292                 CASE,
   1293                 "[\u01F1]", // 'DZ'
   1294                 "[\u01F1\u01F2\u01F3]",
   1295 
   1296                 CASE,
   1297                 "[\u1FB4]",
   1298                 "[\u1FB4{\u03AC\u03B9}]",
   1299 
   1300                 CASE,
   1301                 "[{F\uFB01}]",
   1302                 "[\uFB03{ffi}]",
   1303 
   1304                 CASE,
   1305                 "[a-z]","[A-Za-z\u017F\u212A]",
   1306                 CASE,
   1307                 "[abc]","[A-Ca-c]",
   1308                 CASE,
   1309                 "[ABC]","[A-Ca-c]",
   1310         };
   1311 
   1312         UnicodeSet s = new UnicodeSet();
   1313         UnicodeSet t = new UnicodeSet();
   1314         for (int i=0; i<DATA.length; i+=3) {
   1315             int selector = Integer.parseInt(DATA[i]);
   1316             String pat = DATA[i+1];
   1317             String exp = DATA[i+2];
   1318             s.applyPattern(pat);
   1319             s.closeOver(selector);
   1320             t.applyPattern(exp);
   1321             if (s.equals(t)) {
   1322                 logln("Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
   1323             } else {
   1324                 errln("FAIL: " + pat + ".closeOver(" + selector + ") => " +
   1325                         s.toPattern(true) + ", expected " + exp);
   1326             }
   1327         }
   1328 
   1329         // Test the pattern API
   1330         s.applyPattern("[abc]", UnicodeSet.CASE);
   1331         expectContainment(s, "abcABC", "defDEF");
   1332         s = new UnicodeSet("[^abc]", UnicodeSet.CASE);
   1333         expectContainment(s, "defDEF", "abcABC");
   1334     }
   1335 
   1336     public void TestEscapePattern() {
   1337         // The following pattern must contain at least one range "c-d"
   1338         // where c or d is a Pattern_White_Space.
   1339         String pattern =
   1340                 "[\\uFEFF \\u200E-\\u20FF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
   1341         String exp =
   1342                 "[\\u200E-\\u20FF\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
   1343         // We test this with two passes; in the second pass we
   1344         // pre-unescape the pattern.  Since U+200E is Pattern_White_Space,
   1345         // this fails -- which is what we expect.
   1346         for (int pass=1; pass<=2; ++pass) {
   1347             String pat = pattern;
   1348             if (pass==2) {
   1349                 pat = Utility.unescape(pat);
   1350             }
   1351             // Pattern is only good for pass 1
   1352             boolean isPatternValid = (pass==1);
   1353 
   1354             UnicodeSet set = null;
   1355             try {
   1356                 set = new UnicodeSet(pat);
   1357             } catch (IllegalArgumentException e) {
   1358                 set = null;
   1359             }
   1360             if ((set != null) != isPatternValid){
   1361                 errln("FAIL: applyPattern(" +
   1362                         Utility.escape(pat) + ") => " + set);
   1363                 continue;
   1364             }
   1365             if (set == null) {
   1366                 continue;
   1367             }
   1368             if (set.contains((char)0x0644)){
   1369                 errln("FAIL: " + Utility.escape(pat) + " contains(U+0664)");
   1370             }
   1371 
   1372             String newpat = set.toPattern(true);
   1373             if (newpat.equals(exp)) {
   1374                 logln(Utility.escape(pat) + " => " + newpat);
   1375             } else {
   1376                 errln("FAIL: " + Utility.escape(pat) + " => " + newpat);
   1377             }
   1378 
   1379             for (int i=0; i<set.getRangeCount(); ++i) {
   1380                 StringBuffer str = new StringBuffer("Range ");
   1381                 str.append((char)(0x30 + i))
   1382                 .append(": ");
   1383                 UTF16.append(str, set.getRangeStart(i));
   1384                 str.append(" - ");
   1385                 UTF16.append(str, set.getRangeEnd(i));
   1386                 String s = Utility.escape(str.toString() + " (" + set.getRangeStart(i) + " - " +
   1387                         set.getRangeEnd(i) + ")");
   1388                 if (set.getRangeStart(i) < 0) {
   1389                     errln("FAIL: " + s);
   1390                 } else {
   1391                     logln(s);
   1392                 }
   1393             }
   1394         }
   1395     }
   1396 
   1397     public void TestSymbolTable() {
   1398         // Multiple test cases can be set up here.  Each test case
   1399         // is terminated by null:
   1400         // var, value, var, value,..., input pat., exp. output pat., null
   1401         String DATA[] = {
   1402                 "us", "a-z", "[0-1$us]", "[0-1a-z]", null,
   1403                 "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", null,
   1404                 "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", null
   1405         };
   1406 
   1407         for (int i=0; i<DATA.length; ++i) {
   1408             TokenSymbolTable sym = new TokenSymbolTable();
   1409 
   1410             // Set up variables
   1411             while (DATA[i+2] != null) {
   1412                 sym.add(DATA[i], DATA[i+1]);
   1413                 i += 2;
   1414             }
   1415 
   1416             // Input pattern and expected output pattern
   1417             String inpat = DATA[i], exppat = DATA[i+1];
   1418             i += 2;
   1419 
   1420             ParsePosition pos = new ParsePosition(0);
   1421             UnicodeSet us = new UnicodeSet(inpat, pos, sym);
   1422 
   1423             // results
   1424             if (pos.getIndex() != inpat.length()) {
   1425                 errln("Failed to read to end of string \""
   1426                         + inpat + "\": read to "
   1427                         + pos.getIndex() + ", length is "
   1428                         + inpat.length());
   1429             }
   1430 
   1431             UnicodeSet us2 = new UnicodeSet(exppat);
   1432             if (!us.equals(us2)) {
   1433                 errln("Failed, got " + us + ", expected " + us2);
   1434             } else {
   1435                 logln("Ok, got " + us);
   1436             }
   1437 
   1438             //cover Unicode(String,ParsePosition,SymbolTable,int)
   1439             ParsePosition inpos = new ParsePosition(0);
   1440             UnicodeSet inSet = new UnicodeSet(inpat, inpos, sym, UnicodeSet.IGNORE_SPACE);
   1441             UnicodeSet expSet = new UnicodeSet(exppat);
   1442             if (!inSet.equals(expSet)) {
   1443                 errln("FAIL: Failed, got " + inSet + ", expected " + expSet);
   1444             } else {
   1445                 logln("OK: got " + inSet);
   1446             }
   1447         }
   1448     }
   1449 
   1450     /**
   1451      * Test that Posix style character classes [:digit:], etc.
   1452      *   have the Unicode definitions from TR 18.
   1453      */
   1454     public void TestPosixClasses() {
   1455         expectEqual("POSIX alpha", "[:alpha:]", "\\p{Alphabetic}");
   1456         expectEqual("POSIX lower", "[:lower:]", "\\p{lowercase}");
   1457         expectEqual("POSIX upper", "[:upper:]", "\\p{Uppercase}");
   1458         expectEqual("POSIX punct", "[:punct:]", "\\p{gc=Punctuation}");
   1459         expectEqual("POSIX digit", "[:digit:]", "\\p{gc=DecimalNumber}");
   1460         expectEqual("POSIX xdigit", "[:xdigit:]", "[\\p{DecimalNumber}\\p{HexDigit}]");
   1461         expectEqual("POSIX alnum", "[:alnum:]", "[\\p{Alphabetic}\\p{DecimalNumber}]");
   1462         expectEqual("POSIX space", "[:space:]", "\\p{Whitespace}");
   1463         expectEqual("POSIX blank", "[:blank:]", "[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]");
   1464         expectEqual("POSIX cntrl", "[:cntrl:]", "\\p{Control}");
   1465         expectEqual("POSIX graph", "[:graph:]", "[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]");
   1466         expectEqual("POSIX print", "[:print:]", "[[:graph:][:blank:]-[\\p{Control}]]");
   1467     }
   1468 
   1469     public void TestHangulSyllable() {
   1470         final UnicodeSet lvt = new UnicodeSet("[:Hangul_Syllable_Type=LVT_Syllable:]");
   1471         assertNotEquals("LVT count", new UnicodeSet(), lvt);
   1472         logln(lvt + ": " + lvt.size());
   1473         final UnicodeSet lv = new UnicodeSet("[:Hangul_Syllable_Type=LV_Syllable:]");
   1474         assertNotEquals("LV count", new UnicodeSet(), lv);
   1475         logln(lv + ": " + lv.size());
   1476     }
   1477 
   1478     /**
   1479      * Test that frozen classes disallow changes. For 4217
   1480      */
   1481     public void TestFrozen() {
   1482         UnicodeSet test = new UnicodeSet("[[:whitespace:]A]");
   1483         test.freeze();
   1484         checkModification(test, true);
   1485         checkModification(test, false);
   1486     }
   1487 
   1488     /**
   1489      * Test Generic support
   1490      */
   1491     public void TestGenerics() {
   1492         UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
   1493         UnicodeSet set2 = new UnicodeSet("[e-f {ch}]").freeze();
   1494         UnicodeSet set3 = new UnicodeSet("[d m-n {dh}]").freeze();
   1495         // A useful range of sets for testing, including both characters and strings
   1496         // set 1 contains set2
   1497         // set 1 is overlaps with set 3
   1498         // set 2 is disjoint with set 3
   1499 
   1500         //public Iterator<String> iterator() {
   1501 
   1502         ArrayList<String> oldList = new ArrayList<String>();
   1503         for (UnicodeSetIterator it = new UnicodeSetIterator(set1); it.next();) {
   1504             oldList.add(it.getString());
   1505         }
   1506 
   1507         ArrayList<String> list1 = new ArrayList<String>();
   1508         for (String s : set1) {
   1509             list1.add(s);
   1510         }
   1511         assertEquals("iteration test", oldList, list1);
   1512 
   1513         //addAllTo(Iterable<T>, U)
   1514         list1.clear();
   1515         set1.addAllTo(list1);
   1516         assertEquals("iteration test", oldList, list1);
   1517 
   1518         list1 = set1.addAllTo(new ArrayList<String>());
   1519         assertEquals("addAllTo", oldList, list1);
   1520 
   1521         ArrayList<String> list2 = set2.addAllTo(new ArrayList<String>());
   1522         ArrayList<String> list3 = set3.addAllTo(new ArrayList<String>());
   1523 
   1524         // put them into different order, to check that order doesn't matter
   1525         TreeSet sorted1 = set1.addAllTo(new TreeSet<String>());
   1526         TreeSet sorted2 = set2.addAllTo(new TreeSet<String>());
   1527         TreeSet sorted3 = set3.addAllTo(new TreeSet<String>());
   1528 
   1529         //containsAll(Collection<String> collection)
   1530         assertTrue("containsAll", set1.containsAll(list1));
   1531         assertTrue("containsAll", set1.containsAll(sorted1));
   1532         assertTrue("containsAll", set1.containsAll(list2));
   1533         assertTrue("containsAll", set1.containsAll(sorted2));
   1534         assertFalse("containsAll", set1.containsAll(list3));
   1535         assertFalse("containsAll", set1.containsAll(sorted3));
   1536         assertFalse("containsAll", set2.containsAll(list3));
   1537         assertFalse("containsAll", set2.containsAll(sorted3));
   1538 
   1539         //containsSome(Collection<String>)
   1540         assertTrue("containsSome", set1.containsSome(list1));
   1541         assertTrue("containsSome", set1.containsSome(sorted1));
   1542         assertTrue("containsSome", set1.containsSome(list2));
   1543         assertTrue("containsSome", set1.containsSome(sorted2));
   1544         assertTrue("containsSome", set1.containsSome(list3));
   1545         assertTrue("containsSome", set1.containsSome(sorted3));
   1546         assertFalse("containsSome", set2.containsSome(list3));
   1547         assertFalse("containsSome", set2.containsSome(sorted3));
   1548 
   1549         //containsNone(Collection<String>)
   1550         assertFalse("containsNone", set1.containsNone(list1));
   1551         assertFalse("containsNone", set1.containsNone(sorted1));
   1552         assertFalse("containsNone", set1.containsNone(list2));
   1553         assertFalse("containsNone", set1.containsNone(sorted2));
   1554         assertFalse("containsNone", set1.containsNone(list3));
   1555         assertFalse("containsNone", set1.containsNone(sorted3));
   1556         assertTrue("containsNone", set2.containsNone(list3));
   1557         assertTrue("containsNone", set2.containsNone(sorted3));
   1558 
   1559         //addAll(String...)
   1560         UnicodeSet other3 = new UnicodeSet().addAll("d", "m", "n", "dh");
   1561         assertEquals("addAll", set3, other3);
   1562 
   1563         //removeAll(Collection<String>)
   1564         UnicodeSet mod1 = new UnicodeSet(set1).removeAll(set2);
   1565         UnicodeSet mod2 = new UnicodeSet(set1).removeAll(list2);
   1566         assertEquals("remove all", mod1, mod2);
   1567 
   1568         //retainAll(Collection<String>)
   1569         mod1 = new UnicodeSet(set1).retainAll(set2);
   1570         mod2 = new UnicodeSet(set1).retainAll(set2.addAllTo(new LinkedHashSet<String>()));
   1571         assertEquals("remove all", mod1, mod2);
   1572     }
   1573 
   1574     public void TestComparison() {
   1575         UnicodeSet set1 = new UnicodeSet("[a-b d-g {ch} {zh}]").freeze();
   1576         UnicodeSet set2 = new UnicodeSet("[c-e {ch}]").freeze();
   1577         UnicodeSet set3 = new UnicodeSet("[d m-n z {dh}]").freeze();
   1578 
   1579         //compareTo(UnicodeSet)
   1580         // do indirectly, by sorting
   1581         List<UnicodeSet> unsorted = Arrays.asList(set3, set2, set1);
   1582         List<UnicodeSet> goalShortest = Arrays.asList(set2, set3, set1);
   1583         List<UnicodeSet> goalLongest = Arrays.asList(set1, set3, set2);
   1584         List<UnicodeSet> goalLex = Arrays.asList(set1, set2, set3);
   1585 
   1586         List<UnicodeSet> sorted = new ArrayList(new TreeSet<UnicodeSet>(unsorted));
   1587         assertNotEquals("compareTo-shorter-first", unsorted, sorted);
   1588         assertEquals("compareTo-shorter-first", goalShortest, sorted);
   1589 
   1590         TreeSet<UnicodeSet> sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
   1591             public int compare(UnicodeSet o1, UnicodeSet o2) {
   1592                 // TODO Auto-generated method stub
   1593                 return o1.compareTo(o2, ComparisonStyle.LONGER_FIRST);
   1594             }});
   1595         sorted1.addAll(unsorted);
   1596         sorted = new ArrayList(sorted1);
   1597         assertNotEquals("compareTo-longer-first", unsorted, sorted);
   1598         assertEquals("compareTo-longer-first", goalLongest, sorted);
   1599 
   1600         sorted1 = new TreeSet<UnicodeSet>(new Comparator<UnicodeSet>(){
   1601             public int compare(UnicodeSet o1, UnicodeSet o2) {
   1602                 // TODO Auto-generated method stub
   1603                 return o1.compareTo(o2, ComparisonStyle.LEXICOGRAPHIC);
   1604             }});
   1605         sorted1.addAll(unsorted);
   1606         sorted = new ArrayList(sorted1);
   1607         assertNotEquals("compareTo-lex", unsorted, sorted);
   1608         assertEquals("compareTo-lex", goalLex, sorted);
   1609 
   1610         //compare(String, int)
   1611         // make a list of interesting combinations
   1612         List<String> sources = Arrays.asList("\u0000", "a", "b", "\uD7FF", "\uD800", "\uDBFF", "\uDC00", "\uDFFF", "\uE000", "\uFFFD", "\uFFFF");
   1613         TreeSet<String> target = new TreeSet<String>();
   1614         for (String s : sources) {
   1615             target.add(s);
   1616             for (String t : sources) {
   1617                 target.add(s + t);
   1618                 for (String u : sources) {
   1619                     target.add(s + t + u);
   1620                 }
   1621             }
   1622         }
   1623         // now compare all the combinations. If any of them is a code point, use it.
   1624         int maxErrorCount = 0;
   1625         compare:
   1626             for (String last : target) {
   1627                 for (String curr : target) {
   1628                     int lastCount = Character.codePointCount(last, 0, last.length());
   1629                     int currCount = Character.codePointCount(curr, 0, curr.length());
   1630                     int comparison;
   1631                     if (lastCount == 1) {
   1632                         comparison = UnicodeSet.compare(last.codePointAt(0), curr);
   1633                     } else if (currCount == 1) {
   1634                         comparison = UnicodeSet.compare(last, curr.codePointAt(0));
   1635                     } else {
   1636                         continue;
   1637                     }
   1638                     if (comparison != last.compareTo(curr)) {
   1639                         // repeat for debugging
   1640                         if (lastCount == 1) {
   1641                             comparison = UnicodeSet.compare(last.codePointAt(0), curr);
   1642                         } else if (currCount == 1) {
   1643                             comparison = UnicodeSet.compare(last, curr.codePointAt(0));
   1644                         }
   1645                         if (maxErrorCount++ > 10) {
   1646                             errln(maxErrorCount + " Failure in comparing " + last + " & " + curr + "\tOmitting others...");
   1647                             break compare;
   1648                         }
   1649                         errln(maxErrorCount + " Failure in comparing " + last + " & " + curr);
   1650                     }
   1651                 }
   1652             }
   1653 
   1654         //compare(Iterable<T>, Iterable<T>)
   1655         int max = 10;
   1656         List<String> test1 = new ArrayList<String>(max);
   1657         List<String> test2 = new ArrayList<String>(max);
   1658         for (int i = 0; i <= max; ++i) {
   1659             test1.add("a" + i);
   1660             test2.add("a" + (max - i)); // add in reverse order
   1661         }
   1662         assertNotEquals("compare iterable test", test1, test2);
   1663         TreeSet<CharSequence> sortedTest1 = new TreeSet<CharSequence>(test1);
   1664         TreeSet<CharSequence> sortedTest2 = new TreeSet<CharSequence>(test2);
   1665         assertEquals("compare iterable test", sortedTest1, sortedTest2);
   1666     }
   1667 
   1668     public void TestRangeConstructor() {
   1669         UnicodeSet w = new UnicodeSet().addAll(3,5);
   1670         UnicodeSet s = new UnicodeSet(3,5);
   1671         assertEquals("new constructor", w, s);
   1672 
   1673         w = new UnicodeSet().addAll(3,5).addAll(7,7);
   1674         UnicodeSet t = new UnicodeSet(3,5, 7,7);
   1675         assertEquals("new constructor", w, t);
   1676         // check to make sure right exceptions are thrown
   1677         Class expected = IllegalArgumentException.class;
   1678         Class actual;
   1679 
   1680         try {
   1681             actual = null;
   1682             @SuppressWarnings("unused")
   1683             UnicodeSet u = new UnicodeSet(5);
   1684         } catch (IllegalArgumentException e) {
   1685             actual = e.getClass();
   1686         }
   1687         assertEquals("exception if odd", expected, actual);
   1688 
   1689         try {
   1690             actual = null;
   1691             @SuppressWarnings("unused")
   1692             UnicodeSet u = new UnicodeSet(3, 2, 7, 9);
   1693         } catch (IllegalArgumentException e) {
   1694             actual = e.getClass();
   1695         }
   1696         assertEquals("exception for start/end problem", expected, actual);
   1697 
   1698         try {
   1699             actual = null;
   1700             @SuppressWarnings("unused")
   1701             UnicodeSet u = new UnicodeSet(3, 5, 6, 9);
   1702         } catch (IllegalArgumentException e) {
   1703             actual = e.getClass();
   1704         }
   1705         assertEquals("exception for end/start problem", expected, actual);
   1706 
   1707         CheckRangeSpeed(10000, new UnicodeSet("[:whitespace:]"));
   1708         CheckRangeSpeed(1000, new UnicodeSet("[:letter:]"));
   1709     }
   1710 
   1711     /**
   1712      * @param iterations
   1713      * @param testSet
   1714      */
   1715     private void CheckRangeSpeed(int iterations, UnicodeSet testSet) {
   1716         testSet.complement().complement();
   1717         String testPattern = testSet.toString();
   1718         // fill a set of pairs from the pattern
   1719         int[] pairs = new int[testSet.getRangeCount()*2];
   1720         int j = 0;
   1721         for (UnicodeSetIterator it = new UnicodeSetIterator(testSet); it.nextRange();) {
   1722             pairs[j++] = it.codepoint;
   1723             pairs[j++] = it.codepointEnd;
   1724         }
   1725         UnicodeSet fromRange = new UnicodeSet(testSet);
   1726         assertEquals("from range vs pattern", testSet, fromRange);
   1727 
   1728         double start = System.currentTimeMillis();
   1729         for (int i = 0; i < iterations; ++i) {
   1730             fromRange = new UnicodeSet(testSet);
   1731         }
   1732         double middle = System.currentTimeMillis();
   1733         for (int i = 0; i < iterations; ++i) {
   1734             new UnicodeSet(testPattern);
   1735         }
   1736         double end = System.currentTimeMillis();
   1737 
   1738         double rangeConstructorTime = (middle - start)/iterations;
   1739         double patternConstructorTime = (end - middle)/iterations;
   1740         String message = "Range constructor:\t" + rangeConstructorTime + ";\tPattern constructor:\t" + patternConstructorTime + "\t\t"
   1741                 + percent.format(rangeConstructorTime/patternConstructorTime-1);
   1742         if (rangeConstructorTime < 2*patternConstructorTime) {
   1743             logln(message);
   1744         } else {
   1745             errln(message);
   1746         }
   1747     }
   1748 
   1749     NumberFormat percent = NumberFormat.getPercentInstance();
   1750     {
   1751         percent.setMaximumFractionDigits(2);
   1752     }
   1753     // ****************************************
   1754     // UTILITIES
   1755     // ****************************************
   1756 
   1757     public void checkModification(UnicodeSet original, boolean isFrozen) {
   1758         main:
   1759             for (int i = 0; ;++i) {
   1760                 UnicodeSet test = (UnicodeSet) (isFrozen ? original.clone() : original.cloneAsThawed());
   1761                 boolean gotException = true;
   1762                 boolean checkEquals = true;
   1763                 try {
   1764                     switch(i) {
   1765                     case 0: test.add(0); break;
   1766                     case 1: test.add(0,1); break;
   1767                     case 2: test.add("a"); break;
   1768                     case 3: List a = new ArrayList(); a.add("a"); test.addAll(a); break;
   1769                     case 4: test.addAll("ab"); break;
   1770                     case 5: test.addAll(new UnicodeSet("[ab]")); break;
   1771                     case 6: test.applyIntPropertyValue(0,0); break;
   1772                     case 7: test.applyPattern("[ab]"); break;
   1773                     case 8: test.applyPattern("[ab]", true); break;
   1774                     case 9: test.applyPattern("[ab]", 0); break;
   1775                     case 10: test.applyPropertyAlias("hex","true"); break;
   1776                     case 11: test.applyPropertyAlias("hex", "true", null); break;
   1777                     case 12: test.closeOver(UnicodeSet.CASE); break;
   1778                     case 13: test.compact(); checkEquals = false; break;
   1779                     case 14: test.complement(0); break;
   1780                     case 15: test.complement(0,0); break;
   1781                     case 16: test.complement("ab"); break;
   1782                     case 17: test.complementAll("ab"); break;
   1783                     case 18: test.complementAll(new UnicodeSet("[ab]")); break;
   1784                     case 19: test.remove(' '); break;
   1785                     case 20: test.remove(' ','a'); break;
   1786                     case 21: test.remove(" "); break;
   1787                     case 22: test.removeAll(" a"); break;
   1788                     case 23: test.removeAll(new UnicodeSet("[\\ a]")); break;
   1789                     case 24: test.retain(' '); break;
   1790                     case 25: test.retain(' ','a'); break;
   1791                     case 26: test.retain(" "); break;
   1792                     case 27: test.retainAll(" a"); break;
   1793                     case 28: test.retainAll(new UnicodeSet("[\\ a]")); break;
   1794                     case 29: test.set(0,1); break;
   1795                     case 30: test.set(new UnicodeSet("[ab]")); break;
   1796 
   1797                     default: continue main; // so we don't keep having to change the endpoint, and gaps are not skipped.
   1798                     case 35: return;
   1799                     }
   1800                     gotException = false;
   1801                 } catch (UnsupportedOperationException e) {
   1802                     // do nothing
   1803                 }
   1804                 if (isFrozen && !gotException) errln(i + ") attempt to modify frozen object didn't result in an exception");
   1805                 if (!isFrozen && gotException) errln(i + ") attempt to modify thawed object did result in an exception");
   1806                 if (checkEquals) {
   1807                     if (test.equals(original)) {
   1808                         if (!isFrozen) errln(i + ") attempt to modify thawed object didn't change the object");
   1809                     } else { // unequal
   1810                         if (isFrozen) errln(i + ") attempt to modify frozen object changed the object");
   1811                     }
   1812                 }
   1813             }
   1814     }
   1815 
   1816     // Following cod block is commented out to eliminate PrettyPrinter depenencies
   1817 
   1818     //    String[] prettyData = {
   1819     //            "[\\uD7DE-\\uD90C \\uDCB5-\\uDD9F]", // special case
   1820     //            "[:any:]",
   1821     //            "[:whitespace:]",
   1822     //            "[:linebreak=AL:]",
   1823     //    };
   1824     //
   1825     //    public void TestPrettyPrinting() {
   1826     //        try{
   1827     //            PrettyPrinter pp = new PrettyPrinter();
   1828     //
   1829     //            int i = 0;
   1830     //            for (; i < prettyData.length; ++i) {
   1831     //                UnicodeSet test = new UnicodeSet(prettyData[i]);
   1832     //                checkPrettySet(pp, i, test);
   1833     //            }
   1834     //            Random random = new Random(0);
   1835     //            UnicodeSet test = new UnicodeSet();
   1836     //
   1837     //            // To keep runtimes under control, make the number of random test cases
   1838     //            //   to try depends on the test framework exhaustive setting.
   1839     //            //  params.inclusions = 5:   default exhaustive value
   1840     //            //  params.inclusions = 10:  max exhaustive value.
   1841     //            int iterations = 50;
   1842     //            if (params.inclusion > 5) {
   1843     //                iterations = (params.inclusion-5) * 200;
   1844     //            }
   1845     //            for (; i < iterations; ++i) {
   1846     //                double start = random.nextGaussian() * 0x10000;
   1847     //                if (start < 0) start = - start;
   1848     //                if (start > 0x10FFFF) {
   1849     //                    start = 0x10FFFF;
   1850     //                }
   1851     //                double end = random.nextGaussian() * 0x100;
   1852     //                if (end < 0) end = -end;
   1853     //                end = start + end;
   1854     //                if (end > 0x10FFFF) {
   1855     //                    end = 0x10FFFF;
   1856     //                }
   1857     //                test.complement((int)start, (int)end);
   1858     //                checkPrettySet(pp, i, test);
   1859     //            }
   1860     //        }catch(RuntimeException ex){
   1861     //            warnln("Could not load Collator");
   1862     //        }
   1863     //    }
   1864     //
   1865     //    private void checkPrettySet(PrettyPrinter pp, int i, UnicodeSet test) {
   1866     //        String pretty = pp.toPattern(test);
   1867     //        UnicodeSet retry = new UnicodeSet(pretty);
   1868     //        if (!test.equals(retry)) {
   1869     //            errln(i + ". Failed test: " + test + " != " + pretty);
   1870     //        } else {
   1871     //            logln(i + ". Worked for " + truncate(test.toString()) + " => " + truncate(pretty));
   1872     //        }
   1873     //    }
   1874     //
   1875     //    private String truncate(String string) {
   1876     //        if (string.length() <= 100) return string;
   1877     //        return string.substring(0,97) + "...";
   1878     //    }
   1879 
   1880     public class TokenSymbolTable implements SymbolTable {
   1881         HashMap contents = new HashMap();
   1882 
   1883         /**
   1884          * (Non-SymbolTable API) Add the given variable and value to
   1885          * the table.  Variable should NOT contain leading '$'.
   1886          */
   1887         public void add(String var, String value) {
   1888             char[] buffer = new char[value.length()];
   1889             value.getChars(0, value.length(), buffer, 0);
   1890             add(var, buffer);
   1891         }
   1892 
   1893         /**
   1894          * (Non-SymbolTable API) Add the given variable and value to
   1895          * the table.  Variable should NOT contain leading '$'.
   1896          */
   1897         public void add(String var, char[] body) {
   1898             logln("TokenSymbolTable: add \"" + var + "\" => \"" +
   1899                     new String(body) + "\"");
   1900             contents.put(var, body);
   1901         }
   1902 
   1903         /* (non-Javadoc)
   1904          * @see com.ibm.icu.text.SymbolTable#lookup(java.lang.String)
   1905          */
   1906         public char[] lookup(String s) {
   1907             logln("TokenSymbolTable: lookup \"" + s + "\" => \"" +
   1908                     new String((char[]) contents.get(s)) + "\"");
   1909             return (char[])contents.get(s);
   1910         }
   1911 
   1912         /* (non-Javadoc)
   1913          * @see com.ibm.icu.text.SymbolTable#lookupMatcher(int)
   1914          */
   1915         public UnicodeMatcher lookupMatcher(int ch) {
   1916             return null;
   1917         }
   1918 
   1919         /* (non-Javadoc)
   1920          * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String,
   1921      java.text.ParsePosition, int)
   1922          */
   1923         public String parseReference(String text, ParsePosition pos, int
   1924                 limit) {
   1925             int cp;
   1926             int start = pos.getIndex();
   1927             int i;
   1928             for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
   1929                 cp = UTF16.charAt(text, i);
   1930                 if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
   1931                     break;
   1932                 }
   1933             }
   1934             logln("TokenSymbolTable: parse \"" + text + "\" from " +
   1935                     start + " to " + i +
   1936                     " => \"" + text.substring(start,i) + "\"");
   1937             pos.setIndex(i);
   1938             return text.substring(start,i);
   1939         }
   1940     }
   1941 
   1942     public void TestSurrogate() {
   1943         String DATA[] = {
   1944                 // These should all behave identically
   1945                 "[abc\\uD800\\uDC00]",
   1946                 "[abc\uD800\uDC00]",
   1947                 "[abc\\U00010000]",
   1948         };
   1949         for (int i=0; i<DATA.length; ++i) {
   1950             logln("Test pattern " + i + " :" + Utility.escape(DATA[i]));
   1951             UnicodeSet set = new UnicodeSet(DATA[i]);
   1952             expectContainment(set,
   1953                     CharsToUnicodeString("abc\\U00010000"),
   1954                     "\uD800;\uDC00"); // split apart surrogate-pair
   1955             if (set.size() != 4) {
   1956                 errln(Utility.escape("FAIL: " + DATA[i] + ".size() == " +
   1957                         set.size() + ", expected 4"));
   1958             }
   1959         }
   1960     }
   1961 
   1962     public void TestContains() {
   1963         int limit = 256; // combinations to test
   1964         for (int i = 0; i < limit; ++i) {
   1965             logln("Trying: " + i);
   1966             UnicodeSet x = bitsToSet(i);
   1967             for (int j = 0; j < limit; ++j) {
   1968                 UnicodeSet y = bitsToSet(j);
   1969                 boolean containsNone = (i & j) == 0;
   1970                 boolean containsAll = (i & j) == j;
   1971                 boolean equals = i == j;
   1972                 if (containsNone != x.containsNone(y)) {
   1973                     x.containsNone(y); // repeat for debugging
   1974                     errln("FAILED: " + x +  " containsSome " + y);
   1975                 }
   1976                 if (containsAll != x.containsAll(y)) {
   1977                     x.containsAll(y); // repeat for debugging
   1978                     errln("FAILED: " + x +  " containsAll " + y);
   1979                 }
   1980                 if (equals != x.equals(y)) {
   1981                     x.equals(y); // repeat for debugging
   1982                     errln("FAILED: " + x +  " equals " + y);
   1983                 }
   1984             }
   1985         }
   1986     }
   1987 
   1988     void _testComplement(int a) {
   1989         UnicodeSet x = bitsToSet(a);
   1990         UnicodeSet z = bitsToSet(a);
   1991         z.complement();
   1992         int c = setToBits(z);
   1993         if (c != (~a)) {
   1994             errln("FAILED: add: ~" + x +  " != " + z);
   1995             errln("FAILED: add: ~" + a + " != " + c);
   1996         }
   1997         checkCanonicalRep(z, "complement " + a);
   1998     }
   1999 
   2000     void _testAdd(int a, int b) {
   2001         UnicodeSet x = bitsToSet(a);
   2002         UnicodeSet y = bitsToSet(b);
   2003         UnicodeSet z = bitsToSet(a);
   2004         z.addAll(y);
   2005         int c = setToBits(z);
   2006         if (c != (a | b)) {
   2007             errln(Utility.escape("FAILED: add: " + x + " | " + y + " != " + z));
   2008             errln("FAILED: add: " + a + " | " + b + " != " + c);
   2009         }
   2010         checkCanonicalRep(z, "add " + a + "," + b);
   2011     }
   2012 
   2013     void _testRetain(int a, int b) {
   2014         UnicodeSet x = bitsToSet(a);
   2015         UnicodeSet y = bitsToSet(b);
   2016         UnicodeSet z = bitsToSet(a);
   2017         z.retainAll(y);
   2018         int c = setToBits(z);
   2019         if (c != (a & b)) {
   2020             errln("FAILED: retain: " + x + " & " + y + " != " + z);
   2021             errln("FAILED: retain: " + a + " & " + b + " != " + c);
   2022         }
   2023         checkCanonicalRep(z, "retain " + a + "," + b);
   2024     }
   2025 
   2026     void _testRemove(int a, int b) {
   2027         UnicodeSet x = bitsToSet(a);
   2028         UnicodeSet y = bitsToSet(b);
   2029         UnicodeSet z = bitsToSet(a);
   2030         z.removeAll(y);
   2031         int c = setToBits(z);
   2032         if (c != (a &~ b)) {
   2033             errln("FAILED: remove: " + x + " &~ " + y + " != " + z);
   2034             errln("FAILED: remove: " + a + " &~ " + b + " != " + c);
   2035         }
   2036         checkCanonicalRep(z, "remove " + a + "," + b);
   2037     }
   2038 
   2039     void _testXor(int a, int b) {
   2040         UnicodeSet x = bitsToSet(a);
   2041         UnicodeSet y = bitsToSet(b);
   2042         UnicodeSet z = bitsToSet(a);
   2043         z.complementAll(y);
   2044         int c = setToBits(z);
   2045         if (c != (a ^ b)) {
   2046             errln("FAILED: complement: " + x + " ^ " + y + " != " + z);
   2047             errln("FAILED: complement: " + a + " ^ " + b + " != " + c);
   2048         }
   2049         checkCanonicalRep(z, "complement " + a + "," + b);
   2050     }
   2051 
   2052     /**
   2053      * Check that ranges are monotonically increasing and non-
   2054      * overlapping.
   2055      */
   2056     void checkCanonicalRep(UnicodeSet set, String msg) {
   2057         int n = set.getRangeCount();
   2058         if (n < 0) {
   2059             errln("FAIL result of " + msg +
   2060                     ": range count should be >= 0 but is " +
   2061                     n + " for " + Utility.escape(set.toString()));
   2062             return;
   2063         }
   2064         int last = 0;
   2065         for (int i=0; i<n; ++i) {
   2066             int start = set.getRangeStart(i);
   2067             int end = set.getRangeEnd(i);
   2068             if (start > end) {
   2069                 errln("FAIL result of " + msg +
   2070                         ": range " + (i+1) +
   2071                         " start > end: " + start + ", " + end +
   2072                         " for " + Utility.escape(set.toString()));
   2073             }
   2074             if (i > 0 && start <= last) {
   2075                 errln("FAIL result of " + msg +
   2076                         ": range " + (i+1) +
   2077                         " overlaps previous range: " + start + ", " + end +
   2078                         " for " + Utility.escape(set.toString()));
   2079             }
   2080             last = end;
   2081         }
   2082     }
   2083 
   2084     /**
   2085      * Convert a bitmask to a UnicodeSet.
   2086      */
   2087     UnicodeSet bitsToSet(int a) {
   2088         UnicodeSet result = new UnicodeSet();
   2089         for (int i = 0; i < 32; ++i) {
   2090             if ((a & (1<<i)) != 0) {
   2091                 result.add((char)i,(char)i);
   2092             }
   2093         }
   2094 
   2095         return result;
   2096     }
   2097 
   2098     /**
   2099      * Convert a UnicodeSet to a bitmask.  Only the characters
   2100      * U+0000 to U+0020 are represented in the bitmask.
   2101      */
   2102     static int setToBits(UnicodeSet x) {
   2103         int result = 0;
   2104         for (int i = 0; i < 32; ++i) {
   2105             if (x.contains((char)i)) {
   2106                 result |= (1<<i);
   2107             }
   2108         }
   2109         return result;
   2110     }
   2111 
   2112     /**
   2113      * Return the representation of an inversion list based UnicodeSet
   2114      * as a pairs list.  Ranges are listed in ascending Unicode order.
   2115      * For example, the set [a-zA-M3] is represented as "33AMaz".
   2116      */
   2117     static String getPairs(UnicodeSet set) {
   2118         StringBuffer pairs = new StringBuffer();
   2119         for (int i=0; i<set.getRangeCount(); ++i) {
   2120             int start = set.getRangeStart(i);
   2121             int end = set.getRangeEnd(i);
   2122             if (end > 0xFFFF) {
   2123                 end = 0xFFFF;
   2124                 i = set.getRangeCount(); // Should be unnecessary
   2125             }
   2126             pairs.append((char)start).append((char)end);
   2127         }
   2128         return pairs.toString();
   2129     }
   2130 
   2131     /**
   2132      * Test function. Make sure that the sets have the right relation
   2133      */
   2134 
   2135     void expectRelation(Object relationObj, Object set1Obj, Object set2Obj, String message) {
   2136         int relation = ((Integer) relationObj).intValue();
   2137         UnicodeSet set1 = (UnicodeSet) set1Obj;
   2138         UnicodeSet set2 = (UnicodeSet) set2Obj;
   2139 
   2140         // by-the-by, check the iterator
   2141         checkRoundTrip(set1);
   2142         checkRoundTrip(set2);
   2143 
   2144         boolean contains = set1.containsAll(set2);
   2145         boolean isContained = set2.containsAll(set1);
   2146         boolean disjoint = set1.containsNone(set2);
   2147         boolean equals = set1.equals(set2);
   2148 
   2149         UnicodeSet intersection = new UnicodeSet(set1).retainAll(set2);
   2150         UnicodeSet minus12 = new UnicodeSet(set1).removeAll(set2);
   2151         UnicodeSet minus21 = new UnicodeSet(set2).removeAll(set1);
   2152 
   2153         // test basic properties
   2154 
   2155         if (contains != (intersection.size() == set2.size())) {
   2156             errln("FAIL contains1" + set1.toPattern(true) + ", " + set2.toPattern(true));
   2157         }
   2158 
   2159         if (contains != (intersection.equals(set2))) {
   2160             errln("FAIL contains2" + set1.toPattern(true) + ", " + set2.toPattern(true));
   2161         }
   2162 
   2163         if (isContained != (intersection.size() == set1.size())) {
   2164             errln("FAIL isContained1" + set1.toPattern(true) + ", " + set2.toPattern(true));
   2165         }
   2166 
   2167         if (isContained != (intersection.equals(set1))) {
   2168             errln("FAIL isContained2" + set1.toPattern(true) + ", " + set2.toPattern(true));
   2169         }
   2170 
   2171         if ((contains && isContained) != equals) {
   2172             errln("FAIL equals" + set1.toPattern(true) + ", " + set2.toPattern(true));
   2173         }
   2174 
   2175         if (disjoint != (intersection.size() == 0)) {
   2176             errln("FAIL disjoint" + set1.toPattern(true) + ", " + set2.toPattern(true));
   2177         }
   2178 
   2179         // Now see if the expected relation is true
   2180         int status = (minus12.size() != 0 ? 4 : 0)
   2181                 | (intersection.size() != 0 ? 2 : 0)
   2182                 | (minus21.size() != 0 ? 1 : 0);
   2183 
   2184         if (status != relation) {
   2185             errln("FAIL relation incorrect" + message
   2186                     + "; desired = " + RELATION_NAME[relation]
   2187                             + "; found = " + RELATION_NAME[status]
   2188                                     + "; set1 = " + set1.toPattern(true)
   2189                                     + "; set2 = " + set2.toPattern(true)
   2190                     );
   2191         }
   2192     }
   2193 
   2194     /**
   2195      * Basic consistency check for a few items.
   2196      * That the iterator works, and that we can create a pattern and
   2197      * get the same thing back
   2198      */
   2199 
   2200     void checkRoundTrip(UnicodeSet s) {
   2201         String pat = s.toPattern(false);
   2202         UnicodeSet t = copyWithIterator(s, false);
   2203         checkEqual(s, t, "iterator roundtrip");
   2204 
   2205         t = copyWithIterator(s, true); // try range
   2206         checkEqual(s, t, "iterator roundtrip");
   2207 
   2208         t = new UnicodeSet(pat);
   2209         checkEqual(s, t, "toPattern(false)");
   2210 
   2211         pat = s.toPattern(true);
   2212         t = new UnicodeSet(pat);
   2213         checkEqual(s, t, "toPattern(true)");
   2214     }
   2215 
   2216     UnicodeSet copyWithIterator(UnicodeSet s, boolean withRange) {
   2217         UnicodeSet t = new UnicodeSet();
   2218         UnicodeSetIterator it = new UnicodeSetIterator(s);
   2219         if (withRange) {
   2220             while (it.nextRange()) {
   2221                 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
   2222                     t.add(it.string);
   2223                 } else {
   2224                     t.add(it.codepoint, it.codepointEnd);
   2225                 }
   2226             }
   2227         } else {
   2228             while (it.next()) {
   2229                 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
   2230                     t.add(it.string);
   2231                 } else {
   2232                     t.add(it.codepoint);
   2233                 }
   2234             }
   2235         }
   2236         return t;
   2237     }
   2238 
   2239     boolean checkEqual(UnicodeSet s, UnicodeSet t, String message) {
   2240         if (!s.equals(t)) {
   2241             errln("FAIL " + message
   2242                     + "; source = " + s.toPattern(true)
   2243                     + "; result = " + t.toPattern(true)
   2244                     );
   2245             return false;
   2246         }
   2247         return true;
   2248     }
   2249 
   2250     void expectEqual(String name, String pat1, String pat2) {
   2251         UnicodeSet set1, set2;
   2252         try {
   2253             set1 = new UnicodeSet(pat1);
   2254             set2 = new UnicodeSet(pat2);
   2255         } catch (IllegalArgumentException e) {
   2256             errln("FAIL: Couldn't create UnicodeSet from pattern for \"" + name + "\": " + e.getMessage());
   2257             return;
   2258         }
   2259         if(!set1.equals(set2)) {
   2260             errln("FAIL: Sets built from patterns differ for \"" + name + "\"");
   2261         }
   2262     }
   2263 
   2264     /**
   2265      * Expect the given set to contain the characters in charsIn and
   2266      * to not contain those in charsOut.
   2267      */
   2268     void expectContainment(String pat, String charsIn, String charsOut) {
   2269         UnicodeSet set;
   2270         try {
   2271             set = new UnicodeSet(pat);
   2272         } catch (IllegalArgumentException e) {
   2273             errln("FAIL: Couldn't create UnicodeSet from pattern \"" +
   2274                     pat + "\": " + e.getMessage());
   2275             return;
   2276         }
   2277         expectContainment(set, charsIn, charsOut);
   2278     }
   2279 
   2280     /**
   2281      * Expect the given set to contain the characters in charsIn and
   2282      * to not contain those in charsOut.
   2283      */
   2284     void expectContainment(UnicodeSet set, String charsIn, String charsOut) {
   2285         StringBuffer bad = new StringBuffer();
   2286         if (charsIn != null) {
   2287             charsIn = Utility.unescape(charsIn);
   2288             for (int i=0; i<charsIn.length(); ) {
   2289                 int c = UTF16.charAt(charsIn,i);
   2290                 i += UTF16.getCharCount(c);
   2291                 if (!set.contains(c)) {
   2292                     UTF16.append(bad,c);
   2293                 }
   2294             }
   2295             if (bad.length() > 0) {
   2296                 errln(Utility.escape("FAIL: set " + set + " does not contain " + bad +
   2297                         ", expected containment of " + charsIn));
   2298             } else {
   2299                 logln(Utility.escape("Ok: set " + set + " contains " + charsIn));
   2300             }
   2301         }
   2302         if (charsOut != null) {
   2303             charsOut = Utility.unescape(charsOut);
   2304             bad.setLength(0);
   2305             for (int i=0; i<charsOut.length(); ) {
   2306                 int c = UTF16.charAt(charsOut,i);
   2307                 i += UTF16.getCharCount(c);
   2308                 if (set.contains(c)) {
   2309                     UTF16.append(bad, c);
   2310                 }
   2311             }
   2312             if (bad.length() > 0) {
   2313                 errln(Utility.escape("FAIL: set " + set + " contains " + bad +
   2314                         ", expected non-containment of " + charsOut));
   2315             } else {
   2316                 logln(Utility.escape("Ok: set " + set + " does not contain " + charsOut));
   2317             }
   2318         }
   2319     }
   2320 
   2321     void expectPattern(UnicodeSet set,
   2322             String pattern,
   2323             String expectedPairs) {
   2324         set.applyPattern(pattern);
   2325         if (!getPairs(set).equals(expectedPairs)) {
   2326             errln("FAIL: applyPattern(\"" + pattern +
   2327                     "\") => pairs \"" +
   2328                     Utility.escape(getPairs(set)) + "\", expected \"" +
   2329                     Utility.escape(expectedPairs) + "\"");
   2330         } else {
   2331             logln("Ok:   applyPattern(\"" + pattern +
   2332                     "\") => pairs \"" +
   2333                     Utility.escape(getPairs(set)) + "\"");
   2334         }
   2335     }
   2336 
   2337     void expectToPattern(UnicodeSet set,
   2338             String expPat,
   2339             String[] expStrings) {
   2340         String pat = set.toPattern(true);
   2341         if (pat.equals(expPat)) {
   2342             logln("Ok:   toPattern() => \"" + pat + "\"");
   2343         } else {
   2344             errln("FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
   2345             return;
   2346         }
   2347         if (expStrings == null) {
   2348             return;
   2349         }
   2350         boolean in = true;
   2351         for (int i=0; i<expStrings.length; ++i) {
   2352             if (expStrings[i] == NOT) { // sic; pointer comparison
   2353                 in = false;
   2354                 continue;
   2355             }
   2356             boolean contained = set.contains(expStrings[i]);
   2357             if (contained == in) {
   2358                 logln("Ok: " + expPat +
   2359                         (contained ? " contains {" : " does not contain {") +
   2360                         Utility.escape(expStrings[i]) + "}");
   2361             } else {
   2362                 errln("FAIL: " + expPat +
   2363                         (contained ? " contains {" : " does not contain {") +
   2364                         Utility.escape(expStrings[i]) + "}");
   2365             }
   2366         }
   2367     }
   2368 
   2369     void expectPairs(UnicodeSet set, String expectedPairs) {
   2370         if (!getPairs(set).equals(expectedPairs)) {
   2371             errln("FAIL: Expected pair list \"" +
   2372                     Utility.escape(expectedPairs) + "\", got \"" +
   2373                     Utility.escape(getPairs(set)) + "\"");
   2374         }
   2375     }
   2376     static final String CharsToUnicodeString(String s) {
   2377         return Utility.unescape(s);
   2378     }
   2379 
   2380     /* Test the method public UnicodeSet getSet() */
   2381     public void TestGetSet() {
   2382         UnicodeSetIterator us = new UnicodeSetIterator();
   2383         try {
   2384             us.getSet();
   2385         } catch (Exception e) {
   2386             errln("UnicodeSetIterator.getSet() was not suppose to given an " + "an exception.");
   2387         }
   2388     }
   2389 
   2390     /* Tests the method public UnicodeSet add(Collection<?> source) */
   2391     public void TestAddCollection() {
   2392         UnicodeSet us = new UnicodeSet();
   2393         Collection<?> s = null;
   2394         try {
   2395             us.add(s);
   2396             errln("UnicodeSet.add(Collection<?>) was suppose to return an exception for a null parameter.");
   2397         } catch (Exception e) {
   2398         }
   2399     }
   2400 
   2401     public void TestConstants() {
   2402         assertEquals("Empty", new UnicodeSet(), UnicodeSet.EMPTY);
   2403         assertEquals("All", new UnicodeSet(0,0x10FFFF), UnicodeSet.ALL_CODE_POINTS);
   2404     }
   2405 
   2406     public void TestIteration() {
   2407         UnicodeSet us1 = new UnicodeSet("[abcM{xy}]");
   2408         assertEquals("", "M, a-c", CollectionUtilities.join(us1.ranges(), ", "));
   2409 
   2410         // Sample code
   2411         for (@SuppressWarnings("unused") EntryRange range : us1.ranges()) {
   2412             // do something with code points between range.codepointEnd and range.codepointEnd;
   2413         }
   2414         for (@SuppressWarnings("unused") String s : us1.strings()) {
   2415             // do something with each string;
   2416         }
   2417 
   2418         String[] tests = {
   2419                 "[M-Qzab{XY}{ZW}]",
   2420                 "[]",
   2421                 "[a]",
   2422                 "[a-c]",
   2423                 "[{XY}]",
   2424         };
   2425         for (String test : tests) {
   2426             UnicodeSet us = new UnicodeSet(test);
   2427             UnicodeSetIterator it = new UnicodeSetIterator(us);
   2428             for (EntryRange range : us.ranges()) {
   2429                 final String title = range.toString();
   2430                 logln(title);
   2431                 it.nextRange();
   2432                 assertEquals(title, it.codepoint, range.codepoint);
   2433                 assertEquals(title, it.codepointEnd, range.codepointEnd);
   2434             }
   2435             for (String s : us.strings()) {
   2436                 it.nextRange();
   2437                 assertEquals("strings", it.string, s);
   2438             }
   2439             assertFalse("", it.next());
   2440         }
   2441     }
   2442 
   2443     public void TestReplaceAndDelete() {
   2444         UnicodeSetSpanner m;
   2445 
   2446         m = new UnicodeSetSpanner(new UnicodeSet("[._]"));
   2447         assertEquals("", "abc", m.deleteFrom("_._a_._b_._c_._"));
   2448         assertEquals("", "_.__.__.__._", m.deleteFrom("_._a_._b_._c_._", SpanCondition.NOT_CONTAINED));
   2449 
   2450         assertEquals("", "a_._b_._c", m.trim("_._a_._b_._c_._"));
   2451         assertEquals("", "a_._b_._c_._", m.trim("_._a_._b_._c_._", TrimOption.LEADING));
   2452         assertEquals("", "_._a_._b_._c", m.trim("_._a_._b_._c_._", TrimOption.TRAILING));
   2453 
   2454         assertEquals("", "a??b??c", m.replaceFrom("a_._b_._c", "??", CountMethod.WHOLE_SPAN));
   2455         assertEquals("", "a??b??c", m.replaceFrom(m.trim("_._a_._b_._c_._"), "??", CountMethod.WHOLE_SPAN));
   2456         assertEquals("", "XYXYXYaXYXYXYbXYXYXYcXYXYXY", m.replaceFrom("_._a_._b_._c_._", "XY"));
   2457         assertEquals("", "XYaXYbXYcXY", m.replaceFrom("_._a_._b_._c_._", "XY", CountMethod.WHOLE_SPAN));
   2458 
   2459         m = new UnicodeSetSpanner(new UnicodeSet("\\p{uppercase}"));
   2460         assertEquals("", "TQBF", m.deleteFrom("The Quick Brown Fox.", SpanCondition.NOT_CONTAINED));
   2461 
   2462         m = new UnicodeSetSpanner(m.getUnicodeSet().addAll(new UnicodeSet("\\p{lowercase}")));
   2463         assertEquals("", "TheQuickBrownFox", m.deleteFrom("The Quick Brown Fox.", SpanCondition.NOT_CONTAINED));
   2464 
   2465         m = new UnicodeSetSpanner(new UnicodeSet("[{ab}]"));
   2466         assertEquals("", "XXc acb", m.replaceFrom("ababc acb", "X"));
   2467         assertEquals("", "Xc acb", m.replaceFrom("ababc acb", "X", CountMethod.WHOLE_SPAN));
   2468         assertEquals("", "ababX", m.replaceFrom("ababc acb", "X", CountMethod.WHOLE_SPAN, SpanCondition.NOT_CONTAINED));
   2469     }
   2470 
   2471     public void TestCodePoints() {
   2472         // test supplemental code points and strings clusters
   2473         checkCodePoints("x\u0308", "z\u0308", CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, null, 1);
   2474         checkCodePoints("", "", CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, null, 1);
   2475         checkCodePoints("", "", CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, null, 1);
   2476     }
   2477 
   2478     private void checkCodePoints(String a, String b, CountMethod quantifier, SpanCondition spanCondition,
   2479             String expectedReplaced, int expectedCount) {
   2480         final String ab = a+b;
   2481         UnicodeSetSpanner m = new UnicodeSetSpanner(new UnicodeSet("[{" + a + "}]"));
   2482         assertEquals("new UnicodeSetSpanner(\"[{" + a + "}]\").countIn(\"" + ab + "\")",
   2483                 expectedCount,
   2484                 callCountIn(m, ab, quantifier, spanCondition)
   2485                 );
   2486 
   2487         if (expectedReplaced == null) {
   2488             expectedReplaced = "-" + b;
   2489         }
   2490         assertEquals("new UnicodeSetSpanner(\"[{" + a + "}]\").replaceFrom(\"" + ab + "\", \"-\")",
   2491                 expectedReplaced, m.replaceFrom(ab, "-", quantifier));
   2492     }
   2493 
   2494     public void TestCountIn() {
   2495         UnicodeSetSpanner m = new UnicodeSetSpanner(new UnicodeSet("[ab]"));
   2496         checkCountIn(m, CountMethod.MIN_ELEMENTS, SpanCondition.SIMPLE, "abc", 2);
   2497         checkCountIn(m, CountMethod.WHOLE_SPAN, SpanCondition.SIMPLE, "abc", 1);
   2498         checkCountIn(m, CountMethod.MIN_ELEMENTS, SpanCondition.NOT_CONTAINED, "acccb", 3);
   2499     }
   2500 
   2501     public void checkCountIn(UnicodeSetSpanner m, CountMethod countMethod, SpanCondition spanCondition, String target, int expected) {
   2502         final String message = "countIn " + countMethod + ", " + spanCondition;
   2503         assertEquals(message, callCountIn(m, target, countMethod, spanCondition), expected);
   2504     }
   2505 
   2506     public int callCountIn(UnicodeSetSpanner m, final String ab, CountMethod countMethod, SpanCondition spanCondition) {
   2507         return spanCondition != SpanCondition.SIMPLE ? m.countIn(ab, countMethod, spanCondition)
   2508                 : countMethod != CountMethod.MIN_ELEMENTS ? m.countIn(ab, countMethod)
   2509                         : m.countIn(ab);
   2510     }
   2511 
   2512     public void testForSpanGaps() {
   2513         String[] items = {"a", "b", "c", "{ab}", "{bc}", "{cd}", "{abc}", "{bcd}"};
   2514         final int limit = 1<<items.length;
   2515         // build long string for testing
   2516         StringBuilder longBuffer = new StringBuilder();
   2517         for (int i = 1; i < limit; ++i) {
   2518             longBuffer.append("x");
   2519             longBuffer.append(getCombinations(items, i));
   2520         }
   2521         String longString = longBuffer.toString();
   2522         longString = longString.replace("{","").replace("}","");
   2523 
   2524         long start = System.nanoTime();
   2525         for (int i = 1; i < limit; ++i) {
   2526             UnicodeSet us = new UnicodeSet("[" + getCombinations(items, i) + "]");
   2527             int problemFound = checkSpan(longString, us, SpanCondition.SIMPLE);
   2528             if (problemFound >= 0) {
   2529                 assertEquals("Testing " + longString + ", found gap at", -1, problemFound);
   2530                 break;
   2531             }
   2532         }
   2533         long end = System.nanoTime();
   2534         logln("Time for SIMPLE   :\t" + (end-start));
   2535         start = System.nanoTime();
   2536         for (int i = 1; i < limit; ++i) {
   2537             UnicodeSet us = new UnicodeSet("[" + getCombinations(items, i) + "]");
   2538             int problemFound = checkSpan(longString, us, SpanCondition.CONTAINED);
   2539             if (problemFound >= 0) {
   2540                 assertEquals("Testing " + longString + ", found gap at", -1, problemFound);
   2541                 break;
   2542             }
   2543         }
   2544         end = System.nanoTime();
   2545         logln("Time for CONTAINED:\t" + (end-start));
   2546     }
   2547 
   2548     /**
   2549      * Check that there are no gaps, when we alternate spanning. That is, there
   2550      * should only be a zero length span at the very start.
   2551      * @param longString
   2552      * @param us
   2553      * @param simple
   2554      */
   2555     private int checkSpan(String longString, UnicodeSet us, SpanCondition spanCondition) {
   2556         int start = 0;
   2557         while (start < longString.length()) {
   2558             int limit = us.span(longString, start, spanCondition);
   2559             if (limit == longString.length()) {
   2560                 break;
   2561             } else if (limit == start && start != 0) {
   2562                 return start;
   2563             }
   2564             start = limit;
   2565             limit = us.span(longString, start, SpanCondition.NOT_CONTAINED);
   2566             if (limit == start) {
   2567                 return start;
   2568             }
   2569             start = limit;
   2570         }
   2571         return -1; // all ok
   2572     }
   2573 
   2574     private String getCombinations(String[] items, int bitset) {
   2575         StringBuilder result = new StringBuilder();
   2576         for (int i = 0; bitset != 0; ++i) {
   2577             int other = bitset & (1 << i);
   2578             if (other != 0) {
   2579                 bitset ^= other;
   2580                 result.append(items[i]);
   2581             }
   2582         }
   2583         return result.toString();
   2584     }
   2585 
   2586     public void TestCharSequenceArgs() {
   2587         // statics
   2588         assertEquals("CharSequence from", new UnicodeSet("[{abc}]"), UnicodeSet.from(new StringBuilder("abc")));
   2589         assertEquals("CharSequence fromAll", new UnicodeSet("[a-c]"), UnicodeSet.fromAll(new StringBuilder("abc")));
   2590         assertEquals("CharSequence compare", 1.0f, Math.signum(UnicodeSet.compare(new StringBuilder("abc"), 0x61)));
   2591         assertEquals("CharSequence compare", -1.0f, Math.signum(UnicodeSet.compare(0x61, new StringBuilder("abc"))));
   2592         assertEquals("CharSequence compare", 0.0f, Math.signum(UnicodeSet.compare(new StringBuilder("a"), 0x61)));
   2593         assertEquals("CharSequence compare", 0.0f, Math.signum(UnicodeSet.compare(0x61, new StringBuilder("a"))));
   2594         assertEquals("CharSequence getSingleCodePoint", 0x1F466, UnicodeSet.getSingleCodePoint(new StringBuilder("")));
   2595 
   2596         // iterables/arrays
   2597         Iterable<StringBuilder> iterable = Arrays.asList(new StringBuilder("A"), new StringBuilder("B"));
   2598         assertEquals("CharSequence containsAll", true, new UnicodeSet("[AB]").containsAll(iterable));
   2599         assertEquals("CharSequence containsAll", false, new UnicodeSet("[a-cA]").containsAll(iterable));
   2600         assertEquals("CharSequence containsNone", true, new UnicodeSet("[a-c]").containsNone(iterable) );
   2601         assertEquals("CharSequence containsNone", false, new UnicodeSet("[a-cA]").containsNone(iterable) );
   2602         assertEquals("CharSequence containsSome", true, new UnicodeSet("[a-cA]").containsSome(iterable) );
   2603         assertEquals("CharSequence containsSome", false, new UnicodeSet("[a-c]").containsSome(iterable) );
   2604         assertEquals("CharSequence addAll", new UnicodeSet("[a-cAB]"), new UnicodeSet("[a-cA]").addAll(new StringBuilder("A"), new StringBuilder("B")) );
   2605         assertEquals("CharSequence removeAll", new UnicodeSet("[a-c]"), new UnicodeSet("[a-cA]").removeAll( iterable) );
   2606         assertEquals("CharSequence retainAll", new UnicodeSet("[A]"), new UnicodeSet("[a-cA]").retainAll( iterable) );
   2607 
   2608         // UnicodeSet results
   2609         assertEquals("CharSequence add", new UnicodeSet("[Aa-c{abc}{qr}]"), new UnicodeSet("[a-cA{qr}]").add(new StringBuilder("abc")) );
   2610         assertEquals("CharSequence retain", new UnicodeSet("[{abc}]"), new UnicodeSet("[a-cA{abc}{qr}]").retain(new StringBuilder("abc")) );
   2611         assertEquals("CharSequence remove", new UnicodeSet("[Aa-c{qr}]"), new UnicodeSet("[a-cA{abc}{qr}]").remove(new StringBuilder("abc")) );
   2612         assertEquals("CharSequence complement", new UnicodeSet("[Aa-c{qr}]"), new UnicodeSet("[a-cA{abc}{qr}]").complement(new StringBuilder("abc")) );
   2613         assertEquals("CharSequence complement", new UnicodeSet("[Aa-c{abc}{qr}]"), new UnicodeSet("[a-cA{qr}]").complement(new StringBuilder("abc")) );
   2614 
   2615         assertEquals("CharSequence addAll", new UnicodeSet("[a-cABC]"), new UnicodeSet("[a-cA]").addAll(new StringBuilder("ABC")) );
   2616         assertEquals("CharSequence retainAll", new UnicodeSet("[a-c]"), new UnicodeSet("[a-cA]").retainAll(new StringBuilder("abcB")) );
   2617         assertEquals("CharSequence removeAll", new UnicodeSet("[Aab]"), new UnicodeSet("[a-cA]").removeAll(new StringBuilder("cC")) );
   2618         assertEquals("CharSequence complementAll", new UnicodeSet("[ABbc]"), new UnicodeSet("[a-cA]").complementAll(new StringBuilder("aB")) );
   2619 
   2620         // containment
   2621         assertEquals("CharSequence contains", true, new UnicodeSet("[a-cA{ab}]"). contains(new StringBuilder("ab")) );
   2622         assertEquals("CharSequence containsNone", false, new UnicodeSet("[a-cA]"). containsNone(new StringBuilder("ab"))  );
   2623         assertEquals("CharSequence containsSome", true, new UnicodeSet("[a-cA{ab}]"). containsSome(new StringBuilder("ab"))  );
   2624 
   2625         // spanning
   2626         assertEquals("CharSequence span", 3, new UnicodeSet("[a-cA]"). span(new StringBuilder("abc"), SpanCondition.SIMPLE) );
   2627         assertEquals("CharSequence span", 3, new UnicodeSet("[a-cA]"). span(new StringBuilder("abc"), 1, SpanCondition.SIMPLE) );
   2628         assertEquals("CharSequence spanBack", 0, new UnicodeSet("[a-cA]"). spanBack(new StringBuilder("abc"), SpanCondition.SIMPLE) );
   2629         assertEquals("CharSequence spanBack", 0, new UnicodeSet("[a-cA]"). spanBack(new StringBuilder("abc"), 1, SpanCondition.SIMPLE) );
   2630 
   2631         // internal
   2632         OutputInt outCount = new OutputInt();
   2633         assertEquals("CharSequence matchesAt", 2, new UnicodeSet("[a-cA]"). matchesAt(new StringBuilder("abc"), 1) );
   2634         assertEquals("CharSequence spanAndCount", 3, new UnicodeSet("[a-cA]"). spanAndCount(new StringBuilder("abc"), 1, SpanCondition.SIMPLE, outCount ) );
   2635         assertEquals("CharSequence findIn", 3, new UnicodeSet("[a-cA]"). findIn(new StringBuilder("abc"), 1, true) );
   2636         assertEquals("CharSequence findLastIn", -1, new UnicodeSet("[a-cA]"). findLastIn(new StringBuilder("abc"), 1, true) );
   2637         assertEquals("CharSequence add", "c", new UnicodeSet("[abA]"). stripFrom(new StringBuilder("abc"), true));
   2638     }
   2639 }
   2640