Home | History | Annotate | Download | only in collator
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2008-2015, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.dev.test.collator;
     10 import java.util.ArrayList;
     11 import java.util.Arrays;
     12 import java.util.Collection;
     13 import java.util.Iterator;
     14 import java.util.LinkedHashSet;
     15 import java.util.List;
     16 import java.util.Locale;
     17 import java.util.Set;
     18 import java.util.TreeSet;
     19 
     20 import org.junit.Test;
     21 
     22 import com.ibm.icu.dev.test.TestFmwk;
     23 import com.ibm.icu.dev.util.CollectionUtilities;
     24 import com.ibm.icu.impl.ICUDebug;
     25 import com.ibm.icu.impl.Row;
     26 import com.ibm.icu.impl.Row.R4;
     27 import com.ibm.icu.lang.UCharacter;
     28 import com.ibm.icu.lang.UProperty;
     29 import com.ibm.icu.lang.UScript;
     30 import com.ibm.icu.text.AlphabeticIndex;
     31 import com.ibm.icu.text.AlphabeticIndex.Bucket;
     32 import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
     33 import com.ibm.icu.text.AlphabeticIndex.ImmutableIndex;
     34 import com.ibm.icu.text.AlphabeticIndex.Record;
     35 import com.ibm.icu.text.Collator;
     36 import com.ibm.icu.text.Normalizer2;
     37 import com.ibm.icu.text.RawCollationKey;
     38 import com.ibm.icu.text.RuleBasedCollator;
     39 import com.ibm.icu.text.UTF16;
     40 import com.ibm.icu.text.UnicodeSet;
     41 import com.ibm.icu.util.ULocale;
     42 
     43 /**
     44  * @author Mark Davis
     45  */
     46 public class AlphabeticIndexTest extends TestFmwk {
     47     /**
     48      *
     49      */
     50     private static final String ARROW = "\u2192";
     51     private static final boolean DEBUG = ICUDebug.enabled("alphabeticindex");
     52 
     53     public static Set<String> KEY_LOCALES = new LinkedHashSet(Arrays.asList(
     54             "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl",
     55             "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da",
     56             "he", "nb", "el", "hr", "bg", "sk", "lt", "vi", "lv", "sr",
     57             "pt_PT", "ro", "hu", "cs", "id", "sl", "fil", "fa", "uk",
     58             "ca", "hi", "et", "eu", "is", "sw", "ms", "bn", "am", "ta",
     59             "te", "mr", "ur", "ml", "kn", "gu", "or"));
     60     private String[][] localeAndIndexCharactersLists = new String[][] {
     61             /* Arabic*/ {"ar", "\u0627:\u0628:\u062A:\u062B:\u062C:\u062D:\u062E:\u062F:\u0630:\u0631:\u0632:\u0633:\u0634:\u0635:\u0636:\u0637:\u0638:\u0639:\u063A:\u0641:\u0642:\u0643:\u0644:\u0645:\u0646:\u0647:\u0648:\u064A"},
     62             /* Bulgarian*/  {"bg", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"},
     63             /* Catalan*/    {"ca", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     64             /* Czech*/  {"cs", "A:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:P:Q:R:\u0158:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
     65             /* Danish*/ {"da", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
     66             /* German*/ {"de", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     67             /* Greek*/  {"el", "\u0391:\u0392:\u0393:\u0394:\u0395:\u0396:\u0397:\u0398:\u0399:\u039A:\u039B:\u039C:\u039D:\u039E:\u039F:\u03A0:\u03A1:\u03A3:\u03A4:\u03A5:\u03A6:\u03A7:\u03A8:\u03A9"},
     68             /* English*/    {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     69             /* Spanish*/    {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     70             /* Estonian*/   {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:Z:\u017D:T:U:V:\u00D5:\u00C4:\u00D6:\u00DC:X:Y"},
     71             /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     72             /* Finnish*/    {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"},
     73             /* Filipino*/   {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:Ng:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     74             /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     75             /* Hebrew*/ {"he", "\u05D0:\u05D1:\u05D2:\u05D3:\u05D4:\u05D5:\u05D6:\u05D7:\u05D8:\u05D9:\u05DB:\u05DC:\u05DE:\u05E0:\u05E1:\u05E2:\u05E4:\u05E6:\u05E7:\u05E8:\u05E9:\u05EA"},
     76             /* Icelandic*/  {"is", "A:\u00C1:B:C:D:\u00D0:E:\u00C9:F:G:H:I:\u00CD:J:K:L:M:N:O:\u00D3:P:Q:R:S:T:U:\u00DA:V:W:X:Y:\u00DD:Z:\u00DE:\u00C6:\u00D6"},
     77             /* Italian*/    {"it", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     78             /* Japanese*/   {"ja", "\u3042:\u304B:\u3055:\u305F:\u306A:\u306F:\u307E:\u3084:\u3089:\u308F"},
     79             /* Korean*/ {"ko", "\u3131:\u3134:\u3137:\u3139:\u3141:\u3142:\u3145:\u3147:\u3148:\u314A:\u314B:\u314C:\u314D:\u314E"},
     80             /* Lithuanian*/ {"lt", "A:B:C:\u010C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:\u0160:T:U:V:Z:\u017D"},
     81             /* Latvian*/    {"lv", "A:B:C:\u010C:D:E:F:G:\u0122:H:I:J:K:\u0136:L:\u013B:M:N:\u0145:O:P:Q:R:S:\u0160:T:U:V:W:X:Z:\u017D"},
     82             /* Norwegian Bokm\u00E5l*/  {"nb", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
     83             /* Dutch*/  {"nl", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     84             /* Polish*/ {"pl", "A:\u0104:B:C:\u0106:D:E:\u0118:F:G:H:I:J:K:L:\u0141:M:N:\u0143:O:\u00D3:P:Q:R:S:\u015A:T:U:V:W:X:Y:Z:\u0179:\u017B"},
     85             /* Portuguese*/ {"pt", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     86             /* Romanian*/   {"ro", "A:\u0102:\u00C2:B:C:D:E:F:G:H:I:\u00CE:J:K:L:M:N:O:P:Q:R:S:\u0218:T:\u021A:U:V:W:X:Y:Z"},
     87             /* Russian*/    {"ru", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042B:\u042D:\u042E:\u042F"},
     88             /* Slovak*/ {"sk", "A:\u00C4:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:\u00D4:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
     89             /* Slovenian*/  {"sl", "A:B:C:\u010C:\u0106:D:\u0110:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
     90             /* Serbian*/    {"sr", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0402:\u0415:\u0416:\u0417:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040B:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
     91             /* Swedish*/    {"sv", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"},
     92             /* Turkish*/    {"tr", "A:B:C:\u00C7:D:E:F:G:H:I:\u0130:J:K:L:M:N:O:\u00D6:P:Q:R:S:\u015E:T:U:\u00DC:V:W:X:Y:Z"},
     93             /* Ukrainian*/  {"uk", "\u0410:\u0411:\u0412:\u0413:\u0490:\u0414:\u0415:\u0404:\u0416:\u0417:\u0418:\u0406:\u0407:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"},
     94             /* Vietnamese*/ {"vi", "A:\u0102:\u00C2:B:C:D:\u0110:E:\u00CA:F:G:H:I:J:K:L:M:N:O:\u00D4:\u01A0:P:Q:R:S:T:U:\u01AF:V:W:X:Y:Z"},
     95             /* Chinese*/    {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
     96             /* Chinese (Traditional Han)*/  {"zh_Hant", "1\u5283:2\u5283:3\u5283:4\u5283:5\u5283:6\u5283:7\u5283:8\u5283:9\u5283:10\u5283:11\u5283:12\u5283:13\u5283:14\u5283:15\u5283:16\u5283:17\u5283:18\u5283:19\u5283:20\u5283:21\u5283:22\u5283:23\u5283:24\u5283:25\u5283:26\u5283:27\u5283:28\u5283:29\u5283:30\u5283:31\u5283:32\u5283:33\u5283:35\u5283:36\u5283:39\u5283:48\u5283"},
     97 
     98             // Comment these out to make the test run faster. Later, make these run under extended
     99 
    100             //            /* Afrikaans*/  {"af", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    101             //            /* Akan*/   {"ak", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:O:\u0186:P:Q:R:S:T:U:V:W:X:Y:Z"},
    102             //            /* Asu*/    {"asa", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    103             //            /* Azerbaijani*/    {"az", "A:B:C:\u00C7:D:E:\u018F:F:G:\u011E:H:X:I:\u0130:J:K:Q:L:M:N:O:\u00D6:P:R:S:\u015E:T:U:\u00DC:V:W:Y:Z"},
    104             //            /* Belarusian*/ {"be", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0406:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u042B:\u042D:\u042E:\u042F"},
    105             //            /* Bemba*/  {"bem", "A:B:C:E:F:G:I:J:K:L:M:N:O:P:S:T:U:W:Y"},
    106             //            /* Bena*/   {"bez", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:Y:Z"},
    107             //            /* Bambara*/    {"bm", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:\u0186:P:R:S:T:U:W:Y:Z"},
    108             //            /* Tibetan*/    {"bo", "\u0F40:\u0F41:\u0F42:\u0F44:\u0F45:\u0F46:\u0F47:\u0F49:\u0F4F:\u0F50:\u0F51:\u0F53:\u0F54:\u0F55:\u0F56:\u0F58:\u0F59:\u0F5A:\u0F5B:\u0F5D:\u0F5E:\u0F5F:\u0F60:\u0F61:\u0F62:\u0F63:\u0F64:\u0F66:\u0F67:\u0F68"},
    109             //            /* Chiga*/  {"cgg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    110             //            /* Cherokee*/   {"chr", "\u13A0:\u13A6:\u13AD:\u13B3:\u13B9:\u13BE:\u13C6:\u13CC:\u13D3:\u13DC:\u13E3:\u13E9:\u13EF"},
    111             //            /* Welsh*/  {"cy", "A:B:C:CH:D:E:F:FF:G:H:I:J:L:LL:M:N:O:P:PH:R:RH:S:T:TH:U:W:Y"},
    112             //            /* Taita*/  {"dav", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    113             //            /* Embu*/   {"ebu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    114             //            /* Ewe*/    {"ee", "A:B:C:D:\u0189:E:\u0190:F:\u0191:G:\u0194:H:I:J:K:L:M:N:\u014A:O:\u0186:P:Q:R:S:T:U:V:\u01B2:W:X:Y:Z"},
    115             //            /* Esperanto*/  {"eo", "A:B:C:\u0108:D:E:F:G:\u011C:H:\u0124:I:J:\u0134:K:L:M:N:O:P:R:S:\u015C:T:U:\u016C:V:Z"},
    116             //            /* Fulah*/  {"ff", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:L:M:N:\u014A:O:P:R:S:T:U:W:Y:\u01B3"},
    117             //            /* Faroese*/    {"fo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8"},
    118             //            /* Gusii*/  {"guz", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    119             //            /* Hausa*/  {"ha", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:\u0198:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    120             //            /* Igbo*/   {"ig", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    121             //            /* Machame*/    {"jmc", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    122             //            /* Kabyle*/ {"kab", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:P:Q:R:S:T:U:W:X:Y:Z"},
    123             //            /* Kamba*/  {"kam", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    124             //            /* Makonde*/    {"kde", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    125             //            /* Kabuverdianu*/   {"kea", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:X:Z"},
    126             //            /* Koyra Chiini*/   {"khq", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"},
    127             //            /* Kikuyu*/ {"ki", "A:B:C:D:E:G:H:I:J:K:M:N:O:R:T:U:W:Y"},
    128             //            /* Kalenjin*/   {"kln", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:W:Y"},
    129             //            /* Langi*/  {"lag", "A:B:C:D:E:F:G:H:I:\u0197:J:K:L:M:N:O:P:Q:R:S:T:U:\u0244:V:W:X:Y:Z"},
    130             //            /* Ganda*/  {"lg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    131             //            /* Luo*/    {"luo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"},
    132             //            /* Luyia*/  {"luy", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    133             //            /* Masai*/  {"mas", "A:B:C:D:E:\u0190:G:H:I:\u0197:J:K:L:M:N:\u014A:O:\u0186:P:R:S:T:U:\u0244:W:Y"},
    134             //            /* Meru*/   {"mer", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    135             //            /* Morisyen*/   {"mfe", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y:Z"},
    136             //            /* Malagasy*/   {"mg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:V:Y:Z"},
    137             // This should be the correct data.  Commented till it is fixed in CLDR collation data.
    138             // {"mk", "\u0410:\u0411:\u0412:\u0413:\u0403:\u0414:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u040C:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
    139             //            /* Macedonian*/ {"mk", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0403:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040C:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
    140             // This should be the correct data.  Commented till it is fixed in CLDR collation data.
    141             // {"mt", "A:B:C:\u010A:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"},
    142             //            /* Maltese*/    {"mt", "A:B:\u010A:C:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"},
    143             //            /* Nama*/   {"naq", "A:B:C:D:E:F:G:H:I:K:M:N:O:P:Q:R:S:T:U:W:X:Y:Z"},
    144             //            /* North Ndebele*/  {"nd", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:S:T:U:V:W:X:Y:Z"},
    145             //            /* Norwegian Nynorsk*/  {"nn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
    146             //            /* Nyankole*/   {"nyn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    147             //            /* Oromo*/  {"om", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    148             //            /* Romansh*/    {"rm", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    149             //            /* Rombo*/  {"rof", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    150             //            /* Kinyarwanda*/    {"rw", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    151             //            /* Rwa*/    {"rwk", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    152             //            /* Samburu*/    {"saq", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"},
    153             //            /* Sena*/   {"seh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    154             //            /* Koyraboro Senni*/    {"ses", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"},
    155             //            /* Sango*/  {"sg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    156             //            /* Tachelhit*/  {"shi", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"},
    157             //            /* Tachelhit (Tifinagh)*/   {"shi_Tfng", "\u2D30:\u2D31:\u2D33:\u2D37:\u2D39:\u2D3B:\u2D3C:\u2D3D:\u2D40:\u2D43:\u2D44:\u2D45:\u2D47:\u2D49:\u2D4A:\u2D4D:\u2D4E:\u2D4F:\u2D53:\u2D54:\u2D55:\u2D56:\u2D59:\u2D5A:\u2D5B:\u2D5C:\u2D5F:\u2D61:\u2D62:\u2D63:\u2D65"},
    158             //            /* Shona*/  {"sn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    159             //            /* Teso*/   {"teo", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y"},
    160             //            /* Tonga*/  {"to", "A:B:C:D:E:F:G:H:\u02BB:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    161             //            /* Central Morocco Tamazight*/  {"tzm", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"},
    162             //            /* Uzbek (Latin)*/  {"uz_Latn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u02BF"},
    163             //            /* Vunjo*/  {"vun", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
    164             //            /* Soga*/   {"xog", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    165             //            /* Yoruba*/ {"yo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
    166 
    167     };
    168 
    169 //    public void TestAAKeyword() {
    170 //    ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
    171 //            ICUResourceBundle.ICU_COLLATION_BASE_NAME, "zh");
    172 //    showBundle(rb, 0);
    173 //        String[] keywords = Collator.getKeywords();
    174 //        System.out.println(Arrays.asList(keywords));
    175 //        String locale = "zh";
    176 //        ULocale ulocale = new ULocale(locale);
    177 //        for (String keyword : keywords) {
    178 //            List<String> values = Arrays.asList(Collator.getKeywordValuesForLocale(keyword, ulocale, false));
    179 //            List<String> allValues = Arrays.asList(Collator.getKeywordValues(keyword));
    180 //            for (String value : allValues) {
    181 //                System.out.println(keyword + "=" + value);
    182 //                checkKeyword(locale, value, values.contains(value));
    183 //            }
    184 //        }
    185 //    }
    186 //
    187 //    private void checkKeyword(String locale, String collationValue, boolean shouldExist) {
    188 //        final ULocale base = new ULocale(locale);
    189 //        final ULocale desired = new ULocale(locale + "@collation=" + collationValue);
    190 //        Collator foo = Collator.getInstance(desired);
    191 //        ULocale actual = foo.getLocale(ULocale.ACTUAL_LOCALE);
    192 //        if (shouldExist) {
    193 //            assertEquals("actual should match desired", desired, actual);
    194 //        } else {
    195 //            assertEquals("actual should match base", base, actual);
    196 //        }
    197 //        int comp = foo.compare("a", "");
    198 //        assertEquals("should fall back to default for zh", -1, comp);
    199 //    }
    200 //
    201 //    /**
    202 //     * @param rb
    203 //     * @param i
    204 //     */
    205 //    private static void showBundle(UResourceBundle rb, int i) {
    206 //        for (String key : rb.keySet()) {
    207 //            System.out.print("\n" + Utility.repeat("  ", i) + key);
    208 //            UResourceBundle rb2 = rb.get(key);
    209 //            showBundle(rb2, i+1);
    210 //        }
    211 //    }
    212 
    213 
    214     @Test
    215     public void TestA() {
    216         String[][] tests = {{"zh_Hant", "", "12"},
    217                 {"zh", "", "D"}
    218                 /*, "zh@collation=unihan", "ja@collation=unihan", "ko@collation=unihan"*/
    219                 };
    220         for (String[] test : tests) {
    221             AlphabeticIndex<Integer> alphabeticIndex = new AlphabeticIndex<Integer>(new ULocale(test[0]));
    222             final String probe = test[1];
    223             final String expectedLabel = test[2];
    224             alphabeticIndex.addRecord(probe, 1);
    225             List labels = alphabeticIndex.getBucketLabels();
    226             logln(labels.toString());
    227             Bucket<Integer> bucket = find(alphabeticIndex, probe);
    228             assertEquals("locale " + test[0] + " name=" + probe + " in bucket",
    229                     expectedLabel, bucket.getLabel());
    230         }
    231     }
    232 
    233     private Bucket<Integer> find(AlphabeticIndex<Integer> alphabeticIndex, final String probe) {
    234         for (Bucket<Integer> bucket : alphabeticIndex) {
    235             for (Record<Integer> record : bucket) {
    236                 if (record.getName().equals(probe)) {
    237                     return bucket;
    238                 }
    239             }
    240         }
    241         return null;
    242     }
    243 
    244     @Test
    245     public void TestFirstCharacters() {
    246 
    247         AlphabeticIndex alphabeticIndex = new AlphabeticIndex(Locale.ENGLISH);
    248         RuleBasedCollator collator = alphabeticIndex.getCollator();
    249         collator.setStrength(Collator.IDENTICAL);
    250         Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts();
    251         // Verify that each script is represented exactly once.
    252         // Exclude pseudo-scripts like Common (no letters).
    253         // Exclude scripts like Braille and Sutton SignWriting
    254         // because they only have symbols, not letters.
    255         UnicodeSet missingScripts = new UnicodeSet(
    256                 "[^[:inherited:][:unknown:][:common:][:Braille:][:SignWriting:]]");
    257         String last = "";
    258         for (String index : firsts) {
    259             if (collator.compare(last,index) >= 0) {
    260                 errln("Characters not in order: " + last + " !< " + index);
    261             }
    262             int script = getFirstRealScript(index);
    263             if (script == UScript.UNKNOWN) { continue; }
    264             UnicodeSet s = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script);
    265             if (missingScripts.containsNone(s)) {
    266                 errln("2nd character in script: " + index + "\t" + new UnicodeSet(missingScripts).retainAll(s).toPattern(false));
    267             }
    268             missingScripts.removeAll(s);
    269         }
    270         if (missingScripts.size() != 0) {
    271             String missingScriptNames = "";
    272             UnicodeSet missingChars = new UnicodeSet(missingScripts);
    273             for(;;) {
    274                 int c = missingChars.charAt(0);
    275                 if (c < 0) {
    276                     break;
    277                 }
    278                 int script = UScript.getScript(c);
    279                 missingScriptNames += " " +
    280                         UCharacter.getPropertyValueName(
    281                                 UProperty.SCRIPT, script, UProperty.NameChoice.SHORT);
    282                 missingChars.removeAll(new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script));
    283             }
    284             errln("Missing character from:" + missingScriptNames + " -- " + missingScripts);
    285         }
    286     }
    287 
    288     private static final int getFirstRealScript(CharSequence s) {
    289         for (int i = 0; i < s.length();) {
    290             int c = Character.codePointAt(s, i);
    291             int script = UScript.getScript(c);
    292             if (script != UScript.UNKNOWN && script != UScript.INHERITED && script != UScript.COMMON) {
    293                 return script;
    294             }
    295             i += Character.charCount(c);
    296         }
    297         return UScript.UNKNOWN;
    298     }
    299 
    300     @Test
    301     public void TestBuckets() {
    302         ULocale additionalLocale = ULocale.ENGLISH;
    303 
    304         for (String[] pair : localeAndIndexCharactersLists) {
    305             checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron");
    306         }
    307     }
    308 
    309     @Test
    310     public void TestEmpty() {
    311         // just verify that it doesn't blow up.
    312         Set<ULocale> locales = new LinkedHashSet<ULocale>();
    313         locales.add(ULocale.ROOT);
    314         locales.addAll(Arrays.asList(ULocale.getAvailableLocales()));
    315         for (ULocale locale : locales) {
    316             try {
    317                 AlphabeticIndex<String> alphabeticIndex = new AlphabeticIndex(locale);
    318                 alphabeticIndex.addRecord("hi", "HI");
    319                 for (Bucket<String> bucket : alphabeticIndex) {
    320                     @SuppressWarnings("unused")
    321                     LabelType labelType = bucket.getLabelType();
    322                 }
    323             } catch (Exception e) {
    324                 errln("Exception when creating AlphabeticIndex for:\t" + locale.toLanguageTag());
    325                 errln(e.toString());
    326             }
    327         }
    328     }
    329 
    330     @Test
    331     public void TestSetGetSpecialLabels() {
    332         AlphabeticIndex index = new AlphabeticIndex(Locale.GERMAN).addLabels(new Locale("ru"));
    333         index.setUnderflowLabel("__");
    334         index.setInflowLabel("--");
    335         index.setOverflowLabel("^^");
    336         assertEquals("underflow label", "__", index.getUnderflowLabel());
    337         assertEquals("inflow label", "--", index.getInflowLabel());
    338         assertEquals("overflow label", "^^", index.getOverflowLabel());
    339 
    340         ImmutableIndex ii = index.buildImmutableIndex();
    341         assertEquals("0 -> underflow", "__", ii.getBucket(ii.getBucketIndex("0")).getLabel());
    342         assertEquals(" -> inflow", "--", ii.getBucket(ii.getBucketIndex("")).getLabel());
    343         assertEquals(" -> overflow", "^^", ii.getBucket(ii.getBucketIndex("")).getLabel());
    344     }
    345 
    346     @Test
    347     public void TestInflow() {
    348         Object[][] tests = {
    349                 {0, ULocale.ENGLISH},
    350                 {0, ULocale.ENGLISH, new ULocale("el")},
    351                 {1, ULocale.ENGLISH, new ULocale("ru")},
    352                 {0, ULocale.ENGLISH, new ULocale("el"), new UnicodeSet("[\u2C80]"), new ULocale("ru")},
    353                 {0, ULocale.ENGLISH},
    354                 {2, ULocale.ENGLISH, new ULocale("ru"), ULocale.JAPANESE},
    355         };
    356         for (Object[] test : tests) {
    357             int expected = (Integer) test[0];
    358             AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex((ULocale)test[1]);
    359             for (int i = 2; i < test.length; ++i) {
    360                 if (test[i] instanceof ULocale) {
    361                     alphabeticIndex.addLabels((ULocale)test[i]);
    362                 } else {
    363                     alphabeticIndex.addLabels((UnicodeSet)test[i]);
    364                 }
    365             }
    366             Counter<AlphabeticIndex.Bucket.LabelType> counter = new Counter();
    367             for (Bucket<Double> bucket : alphabeticIndex) {
    368                 LabelType labelType = bucket.getLabelType();
    369                 counter.add(labelType, 1);
    370             }
    371             String printList = Arrays.asList(test).toString();
    372             assertEquals(LabelType.UNDERFLOW + "\t" + printList, 1, counter.get(LabelType.UNDERFLOW));
    373             assertEquals(LabelType.INFLOW + "\t" + printList, expected, counter.get(LabelType.INFLOW));
    374             if (expected != counter.get(LabelType.INFLOW)) {
    375                 // for debugging
    376                 AlphabeticIndex<Double> indexCharacters2 = new AlphabeticIndex((ULocale)test[1]);
    377                 for (int i = 2; i < test.length; ++i) {
    378                     if (test[i] instanceof ULocale) {
    379                         indexCharacters2.addLabels((ULocale)test[i]);
    380                     } else {
    381                         indexCharacters2.addLabels((UnicodeSet)test[i]);
    382                     }
    383                 }
    384                 List<Bucket<Double>> buckets = CollectionUtilities.addAll(alphabeticIndex.iterator(), new ArrayList<Bucket<Double>>());
    385                 logln(buckets.toString());
    386             }
    387             assertEquals(LabelType.OVERFLOW + "\t" + printList, 1, counter.get(LabelType.OVERFLOW));
    388         }
    389     }
    390 
    391     private void checkBuckets(String localeString, String[] test, ULocale additionalLocale, String testBucket, String... items) {
    392         StringBuilder UI = new StringBuilder();
    393         ULocale desiredLocale = new ULocale(localeString);
    394 
    395         // Create a simple index where the values for the strings are Integers, and add the strings
    396         AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addLabels(additionalLocale);
    397         int counter = 0;
    398         Counter<String> itemCount = new Counter();
    399         for (String item : test) {
    400             index.addRecord(item, counter++);
    401             itemCount.add(item, 1);
    402         }
    403         assertEquals("getRecordCount()", (int)itemCount.getTotal(), index.getRecordCount());  // code coverage
    404 
    405         List<String> labels = index.getBucketLabels();
    406         ImmutableIndex<Integer> immIndex = index.buildImmutableIndex();
    407 
    408         logln(desiredLocale + "\t" + desiredLocale.getDisplayName(ULocale.ENGLISH) + " - " + desiredLocale.getDisplayName(desiredLocale) + "\t"
    409                 + index.getCollator().getLocale(ULocale.ACTUAL_LOCALE));
    410         UI.setLength(0);
    411         UI.append(desiredLocale + "\t");
    412         boolean showAll = true;
    413 
    414         // Show index at top. We could skip or gray out empty buckets
    415         for (AlphabeticIndex.Bucket<Integer> bucket : index) {
    416             if (showAll || bucket.size() != 0) {
    417                 showLabelAtTop(UI, bucket.getLabel());
    418             }
    419         }
    420         logln(UI.toString());
    421 
    422         // Show the buckets with their contents, skipping empty buckets
    423         int bucketIndex = 0;
    424         for (Bucket<Integer> bucket : index) {
    425             assertEquals("bucket label vs. iterator",
    426                     labels.get(bucketIndex), bucket.getLabel());
    427             assertEquals("bucket label vs. immutable",
    428                     labels.get(bucketIndex), immIndex.getBucket(bucketIndex).getLabel());
    429             assertEquals("bucket label type vs. immutable",
    430                     bucket.getLabelType(), immIndex.getBucket(bucketIndex).getLabelType());
    431             for (Record<Integer> r : bucket) {
    432                 CharSequence name = r.getName();
    433                 assertEquals("getBucketIndex(" + name + ")",
    434                         bucketIndex, index.getBucketIndex(name));
    435                 assertEquals("immutable getBucketIndex(" + name + ")",
    436                         bucketIndex, immIndex.getBucketIndex(name));
    437             }
    438             if (bucket.getLabel().equals(testBucket)) {
    439                 Counter<String> keys = getKeys(bucket);
    440                 for (String item : items) {
    441                     long globalCount = itemCount.get(item);
    442                     long localeCount = keys.get(item);
    443                     if (globalCount != localeCount) {
    444                         errln("Error: in " + "'" + testBucket + "', '" + item + "' should have count "
    445                                 + globalCount + " but has count " + localeCount);
    446                     }
    447 
    448                 }
    449             }
    450 
    451             if (bucket.size() != 0) {
    452                 showLabelInList(UI, bucket.getLabel());
    453                 for (AlphabeticIndex.Record<Integer> item : bucket) {
    454                     showIndexedItem(UI, item.getName(), item.getData());
    455                 }
    456                 logln(UI.toString());
    457             }
    458             ++bucketIndex;
    459         }
    460         assertEquals("getBucketCount()", bucketIndex, index.getBucketCount());
    461         assertEquals("immutable getBucketCount()", bucketIndex, immIndex.getBucketCount());
    462 
    463         assertNull("immutable getBucket(-1)", immIndex.getBucket(-1));
    464         assertNull("immutable getBucket(count)", immIndex.getBucket(bucketIndex));
    465 
    466         for (Bucket<Integer> bucket : immIndex) {
    467             assertEquals("immutable bucket size", 0, bucket.size());
    468             assertFalse("immutable bucket iterator.hasNext()", bucket.iterator().hasNext());
    469         }
    470     }
    471 
    472     public <T> void showIndex(AlphabeticIndex<T> index, boolean showEmpty) {
    473         logln("Actual");
    474         StringBuilder UI = new StringBuilder();
    475         for (Bucket<T> bucket : index) {
    476             if (showEmpty || bucket.size() != 0) {
    477                 showLabelInList(UI, bucket.getLabel());
    478                 for (Record<T> item : bucket) {
    479                     showIndexedItem(UI, item.getName(), item.getData());
    480                 }
    481                 logln(UI.toString());
    482             }
    483         }
    484     }
    485 
    486     /**
    487      * @param myBucketLabels
    488      * @param myBucketContents
    489      * @param b
    490      */
    491     private void showIndex(List<String> myBucketLabels, ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents, boolean showEmpty) {
    492         logln("Alternative");
    493         StringBuilder UI = new StringBuilder();
    494 
    495         for (int i = 0; i < myBucketLabels.size(); ++i) {
    496             Set<R4<RawCollationKey, String, Integer, Double>> bucket = myBucketContents.get(i);
    497             if (!showEmpty && bucket.size() == 0) {
    498                 continue;
    499             }
    500             UI.setLength(0);
    501             UI.append("*").append(myBucketLabels.get(i));
    502             for (R4<RawCollationKey, String, Integer, Double> item : bucket) {
    503                 UI.append("\t ").append(item.get1().toString()).append(ARROW).append(item.get3().toString());
    504             }
    505             logln(UI.toString());
    506         }
    507     }
    508 
    509     private void showLabelAtTop(StringBuilder buffer, String label) {
    510         buffer.append(label + " ");
    511     }
    512 
    513     private <T> void showIndexedItem(StringBuilder buffer, CharSequence key, T value) {
    514         buffer.append("\t " + key + ARROW + value);
    515     }
    516 
    517     private void showLabelInList(StringBuilder buffer, String label) {
    518         buffer.setLength(0);
    519         buffer.append(label);
    520     }
    521 
    522     private Counter<String> getKeys(AlphabeticIndex.Bucket<Integer> entry) {
    523         Counter<String> keys = new Counter<String>();
    524         for (AlphabeticIndex.Record x : entry) {
    525             String key = x.getName().toString();
    526             keys.add(key, 1);
    527         }
    528         return keys;
    529     }
    530 
    531     @Test
    532     public void TestIndexCharactersList() {
    533         for (String[] localeAndIndexCharacters : localeAndIndexCharactersLists) {
    534             ULocale locale = new ULocale(localeAndIndexCharacters[0]);
    535             String expectedIndexCharacters = "\u2026:" + localeAndIndexCharacters[1] + ":\u2026";
    536             Collection<String> alphabeticIndex = new AlphabeticIndex(locale).getBucketLabels();
    537 
    538             // Join the elements of the list to a string with delimiter ":"
    539             StringBuilder sb = new StringBuilder();
    540             Iterator<String> iter = alphabeticIndex.iterator();
    541             while (iter.hasNext()) {
    542                 sb.append(iter.next());
    543                 if (!iter.hasNext()) {
    544                     break;
    545                 }
    546                 sb.append(":");
    547             }
    548             String actualIndexCharacters = sb.toString();
    549             if (!expectedIndexCharacters.equals(actualIndexCharacters)) {
    550                 errln("Test failed for locale " + localeAndIndexCharacters[0] +
    551                         "\n  Expected = |" + expectedIndexCharacters + "|\n  actual   = |" + actualIndexCharacters + "|");
    552             }
    553         }
    554     }
    555 
    556     @Test
    557     public void TestBasics() {
    558         ULocale[] list = ULocale.getAvailableLocales();
    559         // get keywords combinations
    560         // don't bother with multiple combinations at this point
    561         List keywords = new ArrayList();
    562         keywords.add("");
    563 
    564         String[] collationValues = Collator.getKeywordValues("collation");
    565         for (int j = 0; j < collationValues.length; ++j) {
    566             keywords.add("@collation=" + collationValues[j]);
    567         }
    568 
    569         for (int i = 0; i < list.length; ++i) {
    570             for (Iterator it = keywords.iterator(); it.hasNext();) {
    571                 String collationValue = (String) it.next();
    572                 String localeString = list[i].toString();
    573                 if (!KEY_LOCALES.contains(localeString)) continue; // TODO change in exhaustive
    574                 ULocale locale = new ULocale(localeString + collationValue);
    575                 if (collationValue.length() > 0 && !Collator.getFunctionalEquivalent("collation", locale).equals(locale)) {
    576                     //logln("Skipping " + locale);
    577                     continue;
    578                 }
    579 
    580                 if (locale.getCountry().length() != 0) {
    581                     continue;
    582                 }
    583                 boolean isUnihan = collationValue.contains("unihan");
    584                 AlphabeticIndex alphabeticIndex = new AlphabeticIndex(locale);
    585                 if (isUnihan) {
    586                     // Unihan tailorings have a label per radical, and there are at least 214,
    587                     // if not more when simplified radicals are distinguished.
    588                     alphabeticIndex.setMaxLabelCount(500);
    589                 }
    590                 final Collection mainChars = alphabeticIndex.getBucketLabels();
    591                 String mainCharString = mainChars.toString();
    592                 if (mainCharString.length() > 500) {
    593                     mainCharString = mainCharString.substring(0,500) + "...";
    594                 }
    595                 logln(mainChars.size() + "\t" + locale + "\t" + locale.getDisplayName(ULocale.ENGLISH));
    596                 logln("Index:\t" + mainCharString);
    597                 if (!isUnihan && mainChars.size() > 100) {
    598                     errln("Index character set too large: " +
    599                             locale + " [" + mainChars.size() + "]:\n    " + mainChars);
    600                 }
    601             }
    602         }
    603     }
    604 
    605     @Test
    606     public void TestClientSupport() {
    607         for (String localeString : new String[] {"zh"}) { // KEY_LOCALES, new String[] {"zh"}
    608             ULocale ulocale = new ULocale(localeString);
    609             AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex<Double>(ulocale).addLabels(Locale.ENGLISH);
    610             RuleBasedCollator collator = alphabeticIndex.getCollator();
    611             String [][] tests;
    612 
    613             if (!localeString.equals("zh") ) {
    614                 tests = new String[][] {SimpleTests};
    615             } else {
    616                 tests = new String[][] {SimpleTests, hackPinyin, simplifiedNames};
    617             }
    618 
    619             for (String [] shortTest : tests) {
    620                 double testValue = 100;
    621                 alphabeticIndex.clearRecords();
    622                 for (String name : shortTest) {
    623                     alphabeticIndex.addRecord(name, testValue++);
    624                 }
    625 
    626                 if (DEBUG) showIndex(alphabeticIndex, false);
    627 
    628                 // make my own copy
    629                 testValue = 100;
    630                 List<String> myBucketLabels = alphabeticIndex.getBucketLabels();
    631                 ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents = new ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>>(myBucketLabels.size());
    632                 for (int i = 0; i < myBucketLabels.size(); ++i) {
    633                     myBucketContents.add(new TreeSet<R4<RawCollationKey, String, Integer, Double>>());
    634                 }
    635                 for (String name : shortTest) {
    636                     int bucketIndex = alphabeticIndex.getBucketIndex(name);
    637                     if (bucketIndex > myBucketContents.size()) {
    638                         alphabeticIndex.getBucketIndex(name); // call again for debugging
    639                     }
    640                     Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(bucketIndex);
    641                     RawCollationKey rawCollationKey = collator.getRawCollationKey(name, null);
    642                     R4<RawCollationKey, String, Integer, Double> row = Row.of(rawCollationKey, name, name.length(), testValue++);
    643                     myBucket.add(row);
    644                 }
    645                 if (DEBUG) showIndex(myBucketLabels, myBucketContents, false);
    646 
    647                 // now compare
    648                 int index = 0;
    649                 boolean gotError = false;
    650                 for (AlphabeticIndex.Bucket<Double> bucket : alphabeticIndex) {
    651                     String bucketLabel = bucket.getLabel();
    652                     String myLabel = myBucketLabels.get(index);
    653                     if (!bucketLabel.equals(myLabel)) {
    654                         gotError |= !assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel);
    655                     }
    656                     Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(index);
    657                     Iterator<R4<RawCollationKey, String, Integer, Double>> myBucketIterator = myBucket.iterator();
    658                     int recordIndex = 0;
    659                     for (Record<Double> record : bucket) {
    660                         String myName = null;
    661                         if (myBucketIterator.hasNext()) {
    662                             R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next();
    663                             myName = myRecord.get1();
    664                         }
    665                         if (!record.getName().equals(myName)) {
    666                             gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", record.getName(), myName);
    667                         }
    668                     }
    669                     while (myBucketIterator.hasNext()) {
    670                         R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next();
    671                         String myName = myRecord.get1();
    672                         gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", null, myName);
    673                     }
    674                     index++;
    675                 }
    676                 if (gotError) {
    677                     showIndex(myBucketLabels, myBucketContents, false);
    678                     showIndex(alphabeticIndex, false);
    679                 }
    680             }
    681         }
    682     }
    683 
    684     @Test
    685     public void TestFirstScriptCharacters() {
    686         Collection<String> firstCharacters =
    687                 new AlphabeticIndex(ULocale.ENGLISH).getFirstCharactersInScripts();
    688         Collection<String> expectedFirstCharacters = firstStringsInScript((RuleBasedCollator) Collator.getInstance(ULocale.ROOT));
    689         Collection<String> diff = new TreeSet<String>(firstCharacters);
    690         diff.removeAll(expectedFirstCharacters);
    691         assertTrue("First Characters contains unexpected ones: " + diff, diff.isEmpty());
    692         diff.clear();
    693         diff.addAll(expectedFirstCharacters);
    694         diff.removeAll(firstCharacters);
    695         assertTrue("First Characters missing expected ones: " + diff, diff.isEmpty());
    696     }
    697 
    698     private static final UnicodeSet TO_TRY = new UnicodeSet("[[:^nfcqc=no:]-[:sc=Common:]-[:sc=Inherited:]-[:sc=Unknown:]]").freeze();
    699 
    700     /**
    701      * Returns a collection of all the "First" characters of scripts, according to the collation.
    702      */
    703     private static Collection<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
    704         String[] results = new String[UScript.CODE_LIMIT];
    705         for (String current : TO_TRY) {
    706             if (ruleBasedCollator.compare(current, "a") < 0) { // we only want "real" script characters, not symbols.
    707                 continue;
    708             }
    709             int script = UScript.getScript(current.codePointAt(0));
    710             if (results[script] == null) {
    711                 results[script] = current;
    712             } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
    713                 results[script] = current;
    714             }
    715         }
    716 
    717         try {
    718             UnicodeSet extras = new UnicodeSet();
    719             UnicodeSet expansions = new UnicodeSet();
    720             ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
    721             extras.addAll(expansions).removeAll(TO_TRY);
    722             if (extras.size() != 0) {
    723                 Normalizer2 normalizer = Normalizer2.getNFKCInstance();
    724                 for (String current : extras) {
    725                     if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "9") <= 0) {
    726                         continue;
    727                     }
    728                     int script = getFirstRealScript(current);
    729                     if (script == UScript.UNKNOWN && !isUnassignedBoundary(current)) { continue; }
    730                     if (results[script] == null) {
    731                         results[script] = current;
    732                     } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
    733                         results[script] = current;
    734                     }
    735                 }
    736             }
    737         } catch (Exception e) {
    738         } // why have a checked exception???
    739 
    740         // TODO: We should not test that we get the same strings, but that we
    741         // get strings that sort primary-equal to those from the implementation.
    742 
    743         Collection<String> result = new ArrayList<String>();
    744         for (int i = 0; i < results.length; ++i) {
    745             if (results[i] != null) {
    746                 result.add(results[i]);
    747             }
    748         }
    749         return result;
    750     }
    751 
    752     private static final boolean isUnassignedBoundary(CharSequence s) {
    753         // The root collator provides a script-first-primary boundary contraction
    754         // for the unassigned-implicit range.
    755         return s.charAt(0) == 0xfdd1 &&
    756                 UScript.getScript(Character.codePointAt(s, 1)) == UScript.UNKNOWN;
    757     }
    758 
    759     @Test
    760     public void TestZZZ() {
    761         //            int x = 3;
    762         //            AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
    763         //            UnicodeSet additions = new UnicodeSet();
    764         //            additions.add(0x410).add(0x415);  // Cyrillic
    765         //            // additions.add(0x391).add(0x393);     // Greek
    766         //            index.addLabels(additions);
    767         //            int lc = index.getLabels().size();
    768         //            List  labels = index.getLabels();
    769         //            System.out.println("Label Count = " + lc + "\t" + labels);
    770         //            System.out.println("Bucket Count =" + index.getBucketCount());
    771     }
    772 
    773     @Test
    774     public void TestSimplified() {
    775         checkBuckets("zh", simplifiedNames, ULocale.ENGLISH, "W", "\u897f");
    776     }
    777 
    778     @Test
    779     public void TestTraditional() {
    780         checkBuckets("zh_Hant", traditionalNames, ULocale.ENGLISH, "\u4e9f", "\u5357\u9580");
    781     }
    782 
    783     static final String[] SimpleTests = {
    784         "",
    785         "\u1f2d\u03c1\u03b1",
    786         "$", "\u00a3", "12", "2",
    787         "Davis", "Davis", "Abbot", "\u1D05avis", "Zach", "\u1D05avis", "\u01b5", "\u0130stanbul", "Istanbul", "istanbul", "\u0131stanbul",
    788         "\u00deor", "\u00c5berg", "\u00d6stlund",
    789         "\u1f2d\u03c1\u03b1", "\u1f08\u03b8\u03b7\u03bd\u1fb6",
    790         "\u0396\u03b5\u03cd\u03c2", "\u03a0\u03bf\u03c3\u03b5\u03b9\u03b4\u1f63\u03bd", "\u1f0d\u03b9\u03b4\u03b7\u03c2", "\u0394\u03b7\u03bc\u03ae\u03c4\u03b7\u03c1", "\u1f19\u03c3\u03c4\u03b9\u03ac",
    791         //"\u1f08\u03c0\u03cc\u03bb\u03bb\u03c9\u03bd", "\u1f0c\u03c1\u03c4\u03b5\u03bc\u03b9\u03c2", "\u1f19\u03c1\u03bc\u1f23\u03c2", "\u1f0c\u03c1\u03b7\u03c2", "\u1f08\u03c6\u03c1\u03bf\u03b4\u03af\u03c4\u03b7", "\u1f2d\u03c6\u03b1\u03b9\u03c3\u03c4\u03bf\u03c2", "\u0394\u03b9\u03cc\u03bd\u03c5\u03c3\u03bf\u03c2",
    792         "\u6589\u85e4", "\u4f50\u85e4", "\u9234\u6728", "\u9ad8\u6a4b", "\u7530\u4e2d", "\u6e21\u8fba", "\u4f0a\u85e4", "\u5c71\u672c", "\u4e2d\u6751", "\u5c0f\u6797", "\u658e\u85e4", "\u52a0\u85e4",
    793         //"\u5409\u7530", "\u5c71\u7530", "\u4f50\u3005\u6728", "\u5c71\u53e3", "\u677e\u672c", "\u4e95\u4e0a", "\u6728\u6751", "\u6797", "\u6e05\u6c34"
    794     };
    795 
    796     static final String[] hackPinyin = {
    797         "a", "\u5416", "\u58ba", //
    798         "b", "\u516b", "\u62d4", "\u8500", //
    799         "c", "\u5693", "\u7938", "\u9e7e", //
    800         "d", "\u5491", "\u8fcf", "\u964a", //
    801         "e","\u59b8", "\u92e8", "\u834b", //
    802         "f", "\u53d1", "\u9197", "\u99a5", //
    803         "g", "\u7324", "\u91d3", "\u8142", //
    804         "h", "\u598e", "\u927f", "\u593b", //
    805         "j", "\u4e0c", "\u6785", "\u9d58", //
    806         "k", "\u5494", "\u958b", "\u7a52", //
    807         "l", "\u5783", "\u62c9", "\u9ba5", //
    808         "m", "\u5638", "\u9ebb", "\u65c0", //
    809         "n", "\u62ff", "\u80ad", "\u685b", //
    810         "o", "\u5662", "\u6bee", "\u8bb4", //
    811         "p", "\u5991", "\u8019", "\u8c31", //
    812         "q", "\u4e03", "\u6053", "\u7f56", //
    813         "r", "\u5465", "\u72aa", "\u6e03", //
    814         "s", "\u4ee8", "\u9491", "\u93c1", //
    815         "t", "\u4ed6", "\u9248", "\u67dd", //
    816         "w", "\u5c72", "\u5558", "\u5a7a", //
    817         "x", "\u5915", "\u5438", "\u6bbe", //
    818         "y", "\u4e2b", "\u82bd", "\u8574", //
    819         "z", "\u5e00", "\u707d", "\u5c0a"
    820     };
    821 
    822     static final String[] simplifiedNames = {
    823         "Abbot", "Morton", "Zachary", "Williams", "\u8d75", "\u94b1", "\u5b59", "\u674e", "\u5468", "\u5434", "\u90d1", "\u738b", "\u51af", "\u9648", "\u696e", "\u536b", "\u848b", "\u6c88",
    824         "\u97e9", "\u6768", "\u6731", "\u79e6", "\u5c24", "\u8bb8", "\u4f55", "\u5415", "\u65bd", "\u5f20", "\u5b54", "\u66f9", "\u4e25", "\u534e", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", "\u8c22", "\u90b9",
    825         "\u55bb", "\u67cf", "\u6c34", "\u7aa6", "\u7ae0", "\u4e91", "\u82cf", "\u6f58", "\u845b", "\u595a", "\u8303", "\u5f6d", "\u90ce", "\u9c81", "\u97e6", "\u660c", "\u9a6c", "\u82d7", "\u51e4", "\u82b1", "\u65b9",
    826         "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9c8d", "\u53f2", "\u5510", "\u8d39", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8d3a", "\u502a", "\u6c64", "\u6ed5", "\u6bb7", "\u7f57", "\u6bd5", "\u90dd",
    827         "\u90ac", "\u5b89", "\u5e38", "\u4e50", "\u4e8e", "\u65f6", "\u5085", "\u76ae", "\u535e", "\u9f50", "\u5eb7", "\u4f0d", "\u4f59", "\u5143", "\u535c", "\u987e", "\u5b5f", "\u5e73", "\u9ec4", "\u548c", "\u7a46",
    828         "\u8427", "\u5c39", "\u59da", "\u90b5", "\u6e5b", "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8d1d", "\u660e", "\u81e7", "\u8ba1", "\u4f0f", "\u6210", "\u6234", "\u8c08", "\u5b8b", "\u8305",
    829         "\u5e9e", "\u718a", "\u7eaa", "\u8212", "\u5c48", "\u9879", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u84dd", "\u95fd", "\u5e2d", "\u5b63", "\u9ebb", "\u5f3a", "\u8d3e", "\u8def", "\u5a04", "\u5371",
    830         "\u6c5f", "\u7ae5", "\u989c", "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u953a", "\u5f90", "\u4e18", "\u9a86", "\u9ad8", "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e",
    831         "\u4e07", "\u652f", "\u67ef", "\u661d", "\u7ba1", "\u5362", "\u83ab", "\u7ecf", "\u623f", "\u88d8", "\u7f2a", "\u5e72", "\u89e3", "\u5e94", "\u5b97", "\u4e01", "\u5ba3", "\u8d32", "\u9093", "\u90c1", "\u5355",
    832         "\u676d", "\u6d2a", "\u5305", "\u8bf8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u94ae", "\u9f9a", "\u7a0b", "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9646", "\u8363", "\u7fc1", "\u8340", "\u7f8a", "\u65bc",
    833         "\u60e0", "\u7504", "\u9eb9", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u50a8", "\u9773", "\u6c72", "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u4e4c", "\u7126", "\u5df4", "\u5f13",
    834         "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8f66", "\u4faf", "\u5b93", "\u84ec", "\u5168", "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bab", "\u5b81", "\u4ec7", "\u683e", "\u66b4", "\u7518",
    835         "\u659c", "\u5389", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5218", "\u666f", "\u8a79", "\u675f", "\u9f99", "\u53f6", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u84df", "\u8584", "\u5370", "\u5bbf",
    836         "\u767d", "\u6000", "\u84b2", "\u90b0", "\u4ece", "\u9102", "\u7d22", "\u54b8", "\u7c4d", "\u8d56", "\u5353", "\u853a", "\u5c60", "\u8499", "\u6c60", "\u4e54", "\u9634", "\u90c1", "\u80e5", "\u80fd", "\u82cd",
    837         "\u53cc", "\u95fb", "\u8398", "\u515a", "\u7fdf", "\u8c2d", "\u8d21", "\u52b3", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u90e6", "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842",
    838         "\u6fee", "\u725b", "\u5bff", "\u901a", "\u8fb9", "\u6248", "\u71d5", "\u5180", "\u90cf", "\u6d66", "\u5c1a", "\u519c", "\u6e29", "\u522b", "\u5e84", "\u664f", "\u67f4", "\u77bf", "\u960e", "\u5145", "\u6155",
    839         "\u8fde", "\u8339", "\u4e60", "\u5ba6", "\u827e", "\u9c7c", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", "\u7ec8", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f",
    840         "\u6ee1", "\u5f18", "\u5321", "\u56fd", "\u6587", "\u5bc7", "\u5e7f", "\u7984", "\u9619", "\u4e1c", "\u6b27", "\u6bb3", "\u6c83", "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e08", "\u5de9", "\u538d",
    841         "\u8042", "\u6641", "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u961a", "\u90a3", "\u7b80", "\u9976", "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u517b", "\u97a0", "\u987b", "\u4e30",
    842         "\u5de2", "\u5173", "\u84af", "\u76f8", "\u67e5", "\u540e", "\u8346", "\u7ea2", "\u6e38", "\u7afa", "\u6743", "\u9011", "\u76d6", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u9a6c", "\u4e0a\u5b98", "\u6b27\u9633",
    843         "\u590f\u4faf", "\u8bf8\u845b", "\u95fb\u4eba", "\u4e1c\u65b9", "\u8d6b\u8fde", "\u7687\u752b", "\u5c09\u8fdf", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", "\u6fee\u9633", "\u6df3\u4e8e", "\u5355\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b59", "\u4ef2\u5b59",
    844         "\u8f69\u8f95", "\u4ee4\u72d0", "\u953a\u79bb", "\u5b87\u6587", "\u957f\u5b59", "\u6155\u5bb9", "\u9c9c\u4e8e", "\u95fe\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8f66", "\u989b\u5b59", "\u7aef\u6728", "\u5deb\u9a6c",
    845         "\u516c\u897f", "\u6f06\u96d5", "\u4e50\u6b63", "\u58e4\u9a77", "\u516c\u826f", "\u62d3\u62d4", "\u5939\u8c37", "\u5bb0\u7236", "\u8c37\u6881", "\u664b", "\u695a", "\u960e", "\u6cd5", "\u6c5d", "\u9122", "\u6d82", "\u94a6", "\u6bb5\u5e72", "\u767e\u91cc",
    846         "\u4e1c\u90ed", "\u5357\u95e8", "\u547c\u5ef6", "\u5f52", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e05", "\u7f11", "\u4ea2", "\u51b5", "\u540e", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u4e1c\u95e8", "\u897f\u95e8",
    847         "\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8d4f", "\u5357\u5bab", "\u58a8", "\u54c8", "\u8c2f", "\u7b2a", "\u5e74", "\u7231", "\u9633", "\u4f5f"
    848     };
    849 
    850     static final String[] traditionalNames = { "", "Abbot", "Morton", "Zachary", "Williams", "\u8d99", "\u9322", "\u5b6b",
    851             "\u674e", "\u5468", "\u5433", "\u912d", "\u738b", "\u99ae", "\u9673", "\u696e", "\u885b", "\u8523",
    852             "\u6c88", "\u97d3", "\u694a", "\u6731", "\u79e6", "\u5c24", "\u8a31", "\u4f55", "\u5442", "\u65bd",
    853             "\u5f35", "\u5b54", "\u66f9", "\u56b4", "\u83ef", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a",
    854             "\u8b1d", "\u9112", "\u55bb", "\u67cf", "\u6c34", "\u7ac7", "\u7ae0", "\u96f2", "\u8607", "\u6f58",
    855             "\u845b", "\u595a", "\u7bc4", "\u5f6d", "\u90ce", "\u9b6f", "\u97cb", "\u660c", "\u99ac", "\u82d7",
    856             "\u9cf3", "\u82b1", "\u65b9", "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9b91", "\u53f2",
    857             "\u5510", "\u8cbb", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8cc0", "\u502a", "\u6e6f", "\u6ed5",
    858             "\u6bb7", "\u7f85", "\u7562", "\u90dd", "\u9114", "\u5b89", "\u5e38", "\u6a02", "\u65bc", "\u6642",
    859             "\u5085", "\u76ae", "\u535e", "\u9f4a", "\u5eb7", "\u4f0d", "\u9918", "\u5143", "\u535c", "\u9867",
    860             "\u5b5f", "\u5e73", "\u9ec3", "\u548c", "\u7a46", "\u856d", "\u5c39", "\u59da", "\u90b5", "\u6e5b",
    861             "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8c9d", "\u660e", "\u81e7", "\u8a08",
    862             "\u4f0f", "\u6210", "\u6234", "\u8ac7", "\u5b8b", "\u8305", "\u9f90", "\u718a", "\u7d00", "\u8212",
    863             "\u5c48", "\u9805", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u85cd", "\u95a9", "\u5e2d",
    864             "\u5b63", "\u9ebb", "\u5f37", "\u8cc8", "\u8def", "\u5a41", "\u5371", "\u6c5f", "\u7ae5", "\u984f",
    865             "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u937e", "\u5f90", "\u4e18", "\u99f1", "\u9ad8",
    866             "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", "\u842c", "\u652f",
    867             "\u67ef", "\u661d", "\u7ba1", "\u76e7", "\u83ab", "\u7d93", "\u623f", "\u88d8", "\u7e46", "\u5e79",
    868             "\u89e3", "\u61c9", "\u5b97", "\u4e01", "\u5ba3", "\u8cc1", "\u9127", "\u9b31", "\u55ae", "\u676d",
    869             "\u6d2a", "\u5305", "\u8af8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u9215", "\u9f94", "\u7a0b",
    870             "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9678", "\u69ae", "\u7fc1", "\u8340", "\u7f8a", "\u65bc",
    871             "\u60e0", "\u7504", "\u9eb4", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u5132", "\u9773", "\u6c72",
    872             "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u70cf", "\u7126", "\u5df4",
    873             "\u5f13", "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8eca", "\u4faf", "\u5b93", "\u84ec", "\u5168",
    874             "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bae", "\u5be7", "\u4ec7", "\u6b12",
    875             "\u66b4", "\u7518", "\u659c", "\u53b2", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5289", "\u666f",
    876             "\u8a79", "\u675f", "\u9f8d", "\u8449", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u858a",
    877             "\u8584", "\u5370", "\u5bbf", "\u767d", "\u61f7", "\u84b2", "\u90b0", "\u5f9e", "\u9102", "\u7d22",
    878             "\u54b8", "\u7c4d", "\u8cf4", "\u5353", "\u85fa", "\u5c60", "\u8499", "\u6c60", "\u55ac", "\u9670",
    879             "\u9b31", "\u80e5", "\u80fd", "\u84bc", "\u96d9", "\u805e", "\u8398", "\u9ee8", "\u7fdf", "\u8b5a",
    880             "\u8ca2", "\u52de", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u9148",
    881             "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", "\u6fee", "\u725b", "\u58fd", "\u901a", "\u908a",
    882             "\u6248", "\u71d5", "\u5180", "\u90df", "\u6d66", "\u5c1a", "\u8fb2", "\u6eab", "\u5225", "\u838a",
    883             "\u664f", "\u67f4", "\u77bf", "\u95bb", "\u5145", "\u6155", "\u9023", "\u8339", "\u7fd2", "\u5ba6",
    884             "\u827e", "\u9b5a", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe",
    885             "\u7d42", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", "\u6eff", "\u5f18", "\u5321",
    886             "\u570b", "\u6587", "\u5bc7", "\u5ee3", "\u797f", "\u95d5", "\u6771", "\u6b50", "\u6bb3", "\u6c83",
    887             "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e2b", "\u978f", "\u5399", "\u8076", "\u6641",
    888             "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u95de", "\u90a3", "\u7c21", "\u9952",
    889             "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u990a", "\u97a0", "\u9808", "\u8c50", "\u5de2",
    890             "\u95dc", "\u84af", "\u76f8", "\u67e5", "\u5f8c", "\u834a", "\u7d05", "\u904a", "\u7afa", "\u6b0a",
    891             "\u9011", "\u84cb", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u99ac", "\u4e0a\u5b98",
    892             "\u6b50\u967d", "\u590f\u4faf", "\u8af8\u845b", "\u805e\u4eba", "\u6771\u65b9", "\u8d6b\u9023",
    893             "\u7687\u752b", "\u5c09\u9072", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f",
    894             "\u6fee\u967d", "\u6df3\u4e8e", "\u55ae\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b6b",
    895             "\u4ef2\u5b6b", "\u8ed2\u8f45", "\u4ee4\u72d0", "\u937e\u96e2", "\u5b87\u6587", "\u9577\u5b6b",
    896             "\u6155\u5bb9", "\u9bae\u4e8e", "\u95ad\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98",
    897             "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8eca", "\u9853\u5b6b", "\u7aef\u6728", "\u5deb\u99ac",
    898             "\u516c\u897f", "\u6f06\u96d5", "\u6a02\u6b63", "\u58e4\u99df", "\u516c\u826f", "\u62d3\u62d4",
    899             "\u593e\u8c37", "\u5bb0\u7236", "\u7a40\u6881", "\u6649", "\u695a", "\u95bb", "\u6cd5", "\u6c5d", "\u9122",
    900             "\u5857", "\u6b3d", "\u6bb5\u5e72", "\u767e\u91cc", "\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6",
    901             "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1",
    902             "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580", "\u5546",
    903             "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a",
    904             "\u5e74", "\u611b", "\u967d", "\u4f5f", "\u3401", "\u3422", "\u3426", "\u3493", "\u34A5", "\u34A7",
    905             "\u34AA", "\u3536", "\u4A3B", "\u4E00", "\u4E01", "\u4E07", "\u4E0D", "\u4E17", "\u4E23", "\u4E26",
    906             "\u4E34", "\u4E82", "\u4EB8", "\u4EB9", "\u511F", "\u512D", "\u513D", "\u513E", "\u53B5", "\u56D4",
    907             "\u56D6", "\u7065", "\u7069", "\u706A", "\u7E9E", "\u9750", "\u9F49", "\u9F7E", "\u9F98", "\uD840\uDC35",
    908             "\uD840\uDC3D", "\uD840\uDC3E", "\uD840\uDC41", "\uD840\uDC46", "\uD840\uDC4C", "\uD840\uDC4E",
    909             "\uD840\uDC53", "\uD840\uDC55", "\uD840\uDC56", "\uD840\uDC5F", "\uD840\uDC60", "\uD840\uDC7A",
    910             "\uD840\uDC7B", "\uD840\uDCC8", "\uD840\uDD9E", "\uD840\uDD9F", "\uD840\uDDA0", "\uD840\uDDA1",
    911             "\uD841\uDD3B", "\uD842\uDCCA", "\uD842\uDCCB", "\uD842\uDD6C", "\uD842\uDE0B", "\uD842\uDE0C",
    912             "\uD842\uDED1", "\uD844\uDD9F", "\uD845\uDD19", "\uD845\uDD1A", "\uD846\uDD3B", "\uD84C\uDF5C",
    913             "\uD85A\uDDC4", "\uD85A\uDDC5", "\uD85C\uDD98", "\uD85E\uDCB1", "\uD861\uDC04", "\uD864\uDDD3",
    914             "\uD865\uDE63", "\uD869\uDCCA", "\uD86B\uDE9A", };
    915 
    916     /**
    917      * Test AlphabeticIndex vs. root with script reordering.
    918      */
    919     @Test
    920     public void TestHaniFirst() {
    921         RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
    922         coll.setReorderCodes(UScript.HAN);
    923         AlphabeticIndex index = new AlphabeticIndex(coll);
    924         assertEquals("getBucketCount()", 1, index.getBucketCount());   // ... (underflow only)
    925         index.addLabels(Locale.ENGLISH);
    926         assertEquals("getBucketCount()", 28, index.getBucketCount());  // ... A-Z ...
    927         int bucketIndex = index.getBucketIndex("\u897f");
    928         assertEquals("getBucketIndex(U+897F)", 0, bucketIndex);  // underflow bucket
    929         bucketIndex = index.getBucketIndex("i");
    930         assertEquals("getBucketIndex(i)", 9, bucketIndex);
    931         bucketIndex = index.getBucketIndex("\u03B1");
    932         assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
    933         // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
    934         bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
    935         assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
    936         bucketIndex = index.getBucketIndex("\uFFFF");
    937         assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
    938     }
    939 
    940     /**
    941      * Test AlphabeticIndex vs. Pinyin with script reordering.
    942      */
    943     @Test
    944     public void TestPinyinFirst() {
    945         RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.CHINESE);
    946         coll.setReorderCodes(UScript.HAN);
    947         AlphabeticIndex index = new AlphabeticIndex(coll);
    948         assertEquals("getBucketCount()", 28, index.getBucketCount());   // ... A-Z ...
    949         index.addLabels(Locale.CHINESE);
    950         assertEquals("getBucketCount()", 28, index.getBucketCount());  // ... A-Z ...
    951         int bucketIndex = index.getBucketIndex("\u897f");
    952         assertEquals("getBucketIndex(U+897F)", 'X' - 'A' + 1, bucketIndex);
    953         bucketIndex = index.getBucketIndex("i");
    954         assertEquals("getBucketIndex(i)", 9, bucketIndex);
    955         bucketIndex = index.getBucketIndex("\u03B1");
    956         assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
    957         // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
    958         bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
    959         assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
    960         bucketIndex = index.getBucketIndex("\uFFFF");
    961         assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
    962     }
    963 
    964     /**
    965      * Test labels with multiple primary weights.
    966      */
    967     @Test
    968     public void TestSchSt() {
    969         AlphabeticIndex index = new AlphabeticIndex(ULocale.GERMAN);
    970         index.addLabels(new UnicodeSet("[{Sch*}{St*}]"));
    971         // ... A  B-R S Sch St T-Z ...
    972         ImmutableIndex immIndex = index.buildImmutableIndex();
    973         assertEquals("getBucketCount()", 31, index.getBucketCount());
    974         assertEquals("immutable getBucketCount()", 31, immIndex.getBucketCount());
    975         String[][] testCases = new String[][] {
    976             // name, bucket index, bucket label
    977             { "Adelbert", "1", "A" },
    978             { "Afrika", "1", "A" },
    979             { "sculap", "2", "" },
    980             { "Aesthet", "2", "" },
    981             { "Berlin", "3", "B" },
    982             { "Rilke", "19", "R" },
    983             { "Sacher", "20", "S" },
    984             { "Seiler", "20", "S" },
    985             { "Sultan", "20", "S" },
    986             { "Schiller", "21", "Sch" },
    987             { "Steiff", "22", "St" },
    988             { "Thomas", "23", "T" }
    989         };
    990         List<String> labels = index.getBucketLabels();
    991         for (String[] testCase : testCases) {
    992             String name = testCase[0];
    993             int bucketIndex = Integer.valueOf(testCase[1]);
    994             String label = testCase[2];
    995             String msg = "getBucketIndex(" + name + ")";
    996             assertEquals(msg, bucketIndex, index.getBucketIndex(name));
    997             msg = "immutable " + msg;
    998             assertEquals(msg, bucketIndex, immIndex.getBucketIndex(name));
    999             msg = "bucket label (" + name + ")";
   1000             assertEquals(msg, label, labels.get(index.getBucketIndex(name)));
   1001             msg = "immutable " + msg;
   1002             assertEquals(msg, label, immIndex.getBucket(bucketIndex).getLabel());
   1003         }
   1004     }
   1005 
   1006     /**
   1007      * With no real labels, there should be only the underflow label.
   1008      */
   1009     @Test
   1010     public void TestNoLabels() {
   1011         RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
   1012         AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(coll);
   1013         index.addRecord("\u897f", 0);
   1014         index.addRecord("i", 0);
   1015         index.addRecord("\u03B1", 0);
   1016         assertEquals("getRecordCount()", 3, index.getRecordCount());  // code coverage
   1017         assertEquals("getBucketCount()", 1, index.getBucketCount());  // ...
   1018         Bucket<Integer> bucket = index.iterator().next();
   1019         assertEquals("underflow label type", LabelType.UNDERFLOW, bucket.getLabelType());
   1020         assertEquals("all records in the underflow bucket", 3, bucket.size());
   1021     }
   1022 
   1023     /**
   1024      * Test with the Bopomofo-phonetic tailoring.
   1025      */
   1026     @Test
   1027     public void TestChineseZhuyin() {
   1028         AlphabeticIndex index = new AlphabeticIndex(ULocale.forLanguageTag("zh-u-co-zhuyin"));
   1029         ImmutableIndex immIndex = index.buildImmutableIndex();
   1030         assertEquals("getBucketCount()", 38, immIndex.getBucketCount());  // ...      --  ...
   1031         assertEquals("label 1", "", immIndex.getBucket(1).getLabel());
   1032         assertEquals("label 2", "", immIndex.getBucket(2).getLabel());
   1033         assertEquals("label 3", "", immIndex.getBucket(3).getLabel());
   1034         assertEquals("label 4", "", immIndex.getBucket(4).getLabel());
   1035         assertEquals("label 5", "", immIndex.getBucket(5).getLabel());
   1036     }
   1037 
   1038     @Test
   1039     public void TestJapaneseKanji() {
   1040         AlphabeticIndex index = new AlphabeticIndex(ULocale.JAPANESE);
   1041         AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex();
   1042         // There are no index characters for Kanji in the Japanese standard collator.
   1043         // They should all go into the overflow bucket.
   1044         final int[] kanji = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 };
   1045         int overflowIndex = immIndex.getBucketCount() - 1;
   1046         for(int i = 0; i < kanji.length; ++i) {
   1047             String msg = String.format("kanji[%d]=U+%04X in overflow bucket", i, kanji[i]);
   1048             assertEquals(msg, overflowIndex, immIndex.getBucketIndex(UTF16.valueOf(kanji[i])));
   1049         }
   1050     }
   1051 
   1052     @Test
   1053     public void TestFrozenCollator() {
   1054         // Ticket #9472
   1055         RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale("da"));
   1056         coll.setStrength(Collator.IDENTICAL);
   1057         coll.freeze();
   1058         // The AlphabeticIndex constructor used to throw an exception
   1059         // because it cloned the collator (which preserves frozenness)
   1060         // and set the clone's strength to PRIMARY.
   1061         AlphabeticIndex index = new AlphabeticIndex(coll);
   1062         assertEquals("same strength as input Collator",
   1063                 Collator.IDENTICAL, index.getCollator().getStrength());
   1064     }
   1065 
   1066     @Test
   1067     public void TestChineseUnihan() {
   1068         AlphabeticIndex index = new AlphabeticIndex(new ULocale("zh-u-co-unihan"));
   1069         index.setMaxLabelCount(500);  // ICU 54 default is 99.
   1070         assertEquals("getMaxLabelCount()", 500, index.getMaxLabelCount());  // code coverage
   1071         AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex();
   1072         int bucketCount = immIndex.getBucketCount();
   1073         if(bucketCount < 216) {
   1074             // There should be at least an underflow and overflow label,
   1075             // and one for each of 214 radicals,
   1076             // and maybe additional labels for simplified radicals.
   1077             // (ICU4C: dataerrln(), prints only a warning if the data is missing)
   1078             errln("too few buckets/labels for Chinese/unihan: " + bucketCount +
   1079                     " (is zh/unihan data available?)");
   1080             return;
   1081         } else {
   1082             logln("Chinese/unihan has " + bucketCount + " buckets/labels");
   1083         }
   1084         // bucketIndex = radical number, adjusted for simplified radicals in lower buckets.
   1085         int bucketIndex = index.getBucketIndex("\u4e5d");
   1086         assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex);
   1087         // radical 100, and there is a 90' since Unicode 8
   1088         bucketIndex = index.getBucketIndex("\u7527");
   1089         assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
   1090     }
   1091 
   1092     @Test
   1093     public void testAddLabels_Locale() {
   1094         AlphabeticIndex<?> ulocaleIndex = new AlphabeticIndex<String>(ULocale.CANADA);
   1095         AlphabeticIndex<?> localeIndex = new AlphabeticIndex<String>(Locale.CANADA);
   1096         ulocaleIndex.addLabels(ULocale.SIMPLIFIED_CHINESE);
   1097         localeIndex.addLabels(Locale.SIMPLIFIED_CHINESE);
   1098         assertEquals("getBucketLables() results of ulocaleIndex and localeIndex differ",
   1099                 ulocaleIndex.getBucketLabels(), localeIndex.getBucketLabels());
   1100     }
   1101 
   1102     @Test
   1103     public void testGetRecordCount_empty() {
   1104         assertEquals("Record count of empty index not 0", 0,
   1105                 new AlphabeticIndex<String>(ULocale.CANADA).getRecordCount());
   1106     }
   1107 
   1108     @Test
   1109     public void testGetRecordCount_withRecords() {
   1110         assertEquals("Record count of index with one record not 1", 1,
   1111                 new AlphabeticIndex<String>(ULocale.CANADA).addRecord("foo", null).getRecordCount());
   1112     }
   1113 
   1114     /**
   1115      * Check that setUnderflowLabel/setOverflowLabel/setInflowLabel correctly influence the name of
   1116      * generated labels.
   1117      */
   1118     @Test
   1119     public void testFlowLabels() {
   1120         AlphabeticIndex<?> index = new AlphabeticIndex<String>(ULocale.ENGLISH)
   1121                 .addLabels(ULocale.forLanguageTag("ru"));
   1122         index.setUnderflowLabel("underflow");
   1123         index.setOverflowLabel("overflow");
   1124         index.setInflowLabel("inflow");
   1125         index.addRecord("!", null);
   1126         index.addRecord("\u03B1", null); // GREEK SMALL LETTER ALPHA
   1127         index.addRecord("\uab70", null); // CHEROKEE SMALL LETTER A
   1128         AlphabeticIndex.Bucket<?> underflowBucket = null;
   1129         AlphabeticIndex.Bucket<?> overflowBucket = null;
   1130         AlphabeticIndex.Bucket<?> inflowBucket = null;
   1131         for (AlphabeticIndex.Bucket<?> bucket : index) {
   1132             switch (bucket.getLabelType()) {
   1133                 case UNDERFLOW:
   1134                     assertNull("LabelType not null", underflowBucket);
   1135                     underflowBucket = bucket;
   1136                     break;
   1137                 case OVERFLOW:
   1138                     assertNull("LabelType not null", overflowBucket);
   1139                     overflowBucket = bucket;
   1140                     break;
   1141                 case INFLOW:
   1142                     assertNull("LabelType not null", inflowBucket);
   1143                     inflowBucket = bucket;
   1144                     break;
   1145             }
   1146         }
   1147         assertNotNull("No bucket 'underflow'", underflowBucket);
   1148         assertEquals("Wrong bucket label", "underflow", underflowBucket.getLabel());
   1149         assertEquals("Wrong bucket label", "underflow", index.getUnderflowLabel());
   1150         assertEquals("Bucket size not 1", 1, underflowBucket.size());
   1151         assertNotNull("No bucket 'overflow'", overflowBucket);
   1152         assertEquals("Wrong bucket label", "overflow", overflowBucket.getLabel());
   1153         assertEquals("Wrong bucket label", "overflow", index.getOverflowLabel());
   1154         assertEquals("Bucket size not 1", 1, overflowBucket.size());
   1155         assertNotNull("No bucket 'inflow'", inflowBucket);
   1156         assertEquals("Wrong bucket label", "inflow", inflowBucket.getLabel());
   1157         assertEquals("Wrong bucket label", "inflow", index.getInflowLabel());
   1158         assertEquals("Bucket size not 1", 1, inflowBucket.size());
   1159     }
   1160 }
   1161