Home | History | Annotate | Download | only in collator
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /********************************************************************
      4  * Copyright (c) 2002-2014, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 
      8 /**
      9  * UCAConformanceTest performs conformance tests defined in the data
     10  * files. ICU ships with stub data files, as the whole test are too
     11  * long. To do the whole test, download the test files.
     12  */
     13 
     14 package com.ibm.icu.dev.test.collator;
     15 
     16 import java.io.BufferedReader;
     17 import java.io.IOException;
     18 
     19 import org.junit.Before;
     20 import org.junit.Test;
     21 import org.junit.runner.RunWith;
     22 import org.junit.runners.JUnit4;
     23 
     24 import com.ibm.icu.dev.test.TestFmwk;
     25 import com.ibm.icu.dev.test.TestUtil;
     26 import com.ibm.icu.lang.UCharacter;
     27 import com.ibm.icu.text.Collator;
     28 import com.ibm.icu.text.RawCollationKey;
     29 import com.ibm.icu.text.RuleBasedCollator;
     30 import com.ibm.icu.text.UTF16;
     31 import com.ibm.icu.util.ULocale;
     32 import com.ibm.icu.util.VersionInfo;
     33 
     34 @RunWith(JUnit4.class)
     35 public class UCAConformanceTest extends TestFmwk {
     36 
     37     public UCAConformanceTest() {
     38     }
     39 
     40     @Before
     41     public void init() throws Exception {
     42         UCA = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
     43         comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT);
     44     }
     45 
     46     private RuleBasedCollator UCA;
     47     private RuleBasedCollator rbUCA;
     48     private UTF16.StringComparator comparer;
     49     private boolean isAtLeastUCA62 = UCharacter.getUnicodeVersion().compareTo(VersionInfo.UNICODE_6_2) >= 0;
     50 
     51     @Test
     52     public void TestTableNonIgnorable() {
     53         setCollNonIgnorable(UCA);
     54         openTestFile("NON_IGNORABLE");
     55         conformanceTest(UCA);
     56     }
     57 
     58     @Test
     59     public void TestTableShifted() {
     60         setCollShifted(UCA);
     61         openTestFile("SHIFTED");
     62         conformanceTest(UCA);
     63     }
     64 
     65     @Test
     66     public void TestRulesNonIgnorable() {
     67         if (logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) {
     68             return;
     69         }
     70         initRbUCA();
     71         if (rbUCA == null) {
     72             return;
     73         }
     74 
     75         setCollNonIgnorable(rbUCA);
     76         openTestFile("NON_IGNORABLE");
     77         conformanceTest(rbUCA);
     78     }
     79 
     80     @Test
     81     public void TestRulesShifted() {
     82         logln("This test is currently disabled, as it is impossible to "
     83                 + "wholly represent fractional UCA using tailoring rules.");
     84         return;
     85         /*
     86          * initRbUCA(); if(rbUCA == null) { return; }
     87          *
     88          * setCollShifted(rbUCA); openTestFile("SHIFTED"); testConformance(rbUCA);
     89          */
     90     }
     91 
     92     BufferedReader in;
     93 
     94     private void openTestFile(String type) {
     95         String collationTest = "CollationTest_";
     96         String ext = ".txt";
     97         try {
     98             in = TestUtil.getDataReader(collationTest + type + "_SHORT" + ext);
     99         } catch (Exception e) {
    100             try {
    101                 in = TestUtil.getDataReader(collationTest + type + ext);
    102             } catch (Exception e1) {
    103                 try {
    104                     in = TestUtil.getDataReader(collationTest + type + "_STUB" + ext);
    105                     logln("INFO: Working with the stub file.\n" + "If you need the full conformance test, please\n"
    106                             + "download the appropriate data files from:\n"
    107                             + "http://unicode.org/cldr/trac/browser/trunk/common/uca");
    108                 } catch (Exception e11) {
    109                     errln("ERROR: Could not find any of the test files");
    110                 }
    111             }
    112         }
    113     }
    114 
    115     private void setCollNonIgnorable(RuleBasedCollator coll) {
    116         if (coll != null) {
    117             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    118             coll.setLowerCaseFirst(false);
    119             coll.setCaseLevel(false);
    120             coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.TERTIARY);
    121             coll.setAlternateHandlingShifted(false);
    122         }
    123     }
    124 
    125     private void setCollShifted(RuleBasedCollator coll) {
    126         if (coll != null) {
    127             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    128             coll.setLowerCaseFirst(false);
    129             coll.setCaseLevel(false);
    130             coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.QUATERNARY);
    131             coll.setAlternateHandlingShifted(true);
    132         }
    133     }
    134 
    135     private void initRbUCA() {
    136         if (rbUCA == null) {
    137             String ucarules = UCA.getRules(true);
    138             try {
    139                 rbUCA = new RuleBasedCollator(ucarules);
    140             } catch (Exception e) {
    141                 errln("Failure creating UCA rule-based collator: " + e);
    142             }
    143         }
    144     }
    145 
    146     private String parseString(String line) {
    147         int i = 0, value;
    148         StringBuilder result = new StringBuilder(), buffer = new StringBuilder();
    149 
    150         for (;;) {
    151             while (i < line.length() && Character.isWhitespace(line.charAt(i))) {
    152                 i++;
    153             }
    154             while (i < line.length() && Character.isLetterOrDigit(line.charAt(i))) {
    155                 buffer.append(line.charAt(i));
    156                 i++;
    157             }
    158             if (buffer.length() == 0) {
    159                 // We hit something that was not whitespace/letter/digit.
    160                 // Should be ';' or end of string.
    161                 return result.toString();
    162             }
    163             /* read one code point */
    164             value = Integer.parseInt(buffer.toString(), 16);
    165             buffer.setLength(0);
    166             result.appendCodePoint(value);
    167         }
    168 
    169     }
    170 
    171     private static final int IS_SHIFTED = 1;
    172     private static final int FROM_RULES = 2;
    173 
    174     private static boolean skipLineBecauseOfBug(String s, int flags) {
    175         // Add temporary exceptions here if there are ICU bugs, until we can fix them.
    176         // For examples see the ICU 52 version of this file.
    177         return false;
    178     }
    179 
    180     private static int normalizeResult(int result) {
    181         return result < 0 ? -1 : result == 0 ? 0 : 1;
    182     }
    183 
    184     private void conformanceTest(RuleBasedCollator coll) {
    185         if (in == null || coll == null) {
    186             return;
    187         }
    188         int skipFlags = 0;
    189         if (coll.isAlternateHandlingShifted()) {
    190             skipFlags |= IS_SHIFTED;
    191         }
    192         if (coll == rbUCA) {
    193             skipFlags |= FROM_RULES;
    194         }
    195 
    196         logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest");
    197         boolean withSortKeys = getProperty("ucaconfnosortkeys") == null;
    198 
    199         int lineNo = 0;
    200 
    201         String line = null, oldLine = null, buffer = null, oldB = null;
    202         RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey();
    203         RawCollationKey oldSk = null, newSk = sk1;
    204 
    205         try {
    206             while ((line = in.readLine()) != null) {
    207                 lineNo++;
    208                 if (line.length() == 0 || line.charAt(0) == '#') {
    209                     continue;
    210                 }
    211                 buffer = parseString(line);
    212 
    213                 if (skipLineBecauseOfBug(buffer, skipFlags)) {
    214                     logln("Skipping line " + lineNo + " because of a known bug");
    215                     continue;
    216                 }
    217 
    218                 if (withSortKeys) {
    219                     coll.getRawCollationKey(buffer, newSk);
    220                 }
    221                 if (oldSk != null) {
    222                     boolean ok = true;
    223                     int skres = withSortKeys ? oldSk.compareTo(newSk) : 0;
    224                     int cmpres = coll.compare(oldB, buffer);
    225                     int cmpres2 = coll.compare(buffer, oldB);
    226 
    227                     if (cmpres != -cmpres2) {
    228                         errln(String.format(
    229                                 "Compare result not symmetrical on line %d: "
    230                                         + "previous vs. current (%d) / current vs. previous (%d)",
    231                                 lineNo, cmpres, cmpres2));
    232                         ok = false;
    233                     }
    234 
    235                     // TODO: Compare with normalization turned off if the input passes the FCD test.
    236 
    237                     if (withSortKeys && cmpres != normalizeResult(skres)) {
    238                         errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres
    239                                 + ") on line " + lineNo);
    240                         ok = false;
    241                     }
    242 
    243                     int res = cmpres;
    244                     if (res == 0 && !isAtLeastUCA62) {
    245                         // Up to UCA 6.1, the collation test files use a custom tie-breaker,
    246                         // comparing the raw input strings.
    247                         res = comparer.compare(oldB, buffer);
    248                         // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker,
    249                         // comparing the NFD versions of the input strings,
    250                         // which we do via setting strength=identical.
    251                     }
    252                     if (res > 0) {
    253                         errln("Line " + lineNo + " is not greater or equal than previous line");
    254                         ok = false;
    255                     }
    256 
    257                     if (!ok) {
    258                         errln("  Previous data line " + oldLine);
    259                         errln("  Current data line  " + line);
    260                         if (withSortKeys) {
    261                             errln("  Previous key: " + CollationTest.prettify(oldSk));
    262                             errln("  Current key:  " + CollationTest.prettify(newSk));
    263                         }
    264                     }
    265                 }
    266 
    267                 oldSk = newSk;
    268                 oldB = buffer;
    269                 oldLine = line;
    270                 if (oldSk == sk1) {
    271                     newSk = sk2;
    272                 } else {
    273                     newSk = sk1;
    274                 }
    275             }
    276         } catch (Exception e) {
    277             errln("Unexpected exception " + e);
    278         } finally {
    279             try {
    280                 in.close();
    281             } catch (IOException ignored) {
    282             }
    283             in = null;
    284         }
    285     }
    286 }
    287