Home | History | Annotate | Download | only in translit
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2009-2014, Google, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.dev.test.translit;
     10 
     11 import org.junit.Test;
     12 import org.junit.runner.RunWith;
     13 import org.junit.runners.JUnit4;
     14 
     15 import com.ibm.icu.dev.test.TestFmwk;
     16 import com.ibm.icu.lang.UScript;
     17 import com.ibm.icu.text.Transliterator;
     18 import com.ibm.icu.text.UTF16;
     19 import com.ibm.icu.text.UnicodeSet;
     20 import com.ibm.icu.text.UnicodeSetIterator;
     21 import com.ibm.icu.util.ULocale;
     22 
     23 /**
     24  * @author markdavis
     25  *
     26  */
     27 @RunWith(JUnit4.class)
     28 public class AnyScriptTest extends TestFmwk {
     29     @Test
     30     public void TestContext() {
     31         Transliterator t = Transliterator.createFromRules("foo", "::[bc]; a{b}d > B;", Transliterator.FORWARD);
     32         String sample = "abd abc b";
     33         assertEquals("context works", "aBd abc b", t.transform(sample));
     34     }
     35 
     36     @Test
     37     public void TestScripts(){
     38         // get a couple of characters of each script for testing
     39 
     40         StringBuffer testBuffer = new StringBuffer();
     41         for (int script = 0; script < UScript.CODE_LIMIT; ++script) {
     42             UnicodeSet test = new UnicodeSet().applyPropertyAlias("script", UScript.getName(script));
     43             int count = Math.min(20, test.size());
     44             for (int i = 0; i < count; ++i){
     45                 testBuffer.append(UTF16.valueOf(test.charAt(i)));
     46             }
     47         }
     48         String test = testBuffer.toString();
     49         logln("Test line: " + test);
     50 
     51         int inclusion = TestFmwk.getExhaustiveness();
     52         boolean testedUnavailableScript = false;
     53 
     54         for (int script = 0; script < UScript.CODE_LIMIT; ++script) {
     55             if (script == UScript.COMMON || script == UScript.INHERITED) {
     56                 continue;
     57             }
     58             // if the inclusion rate is not 10, skip all but a small number of items.
     59             // Make sure, however, that we test at least one unavailable script
     60             if (inclusion < 10 && script != UScript.LATIN
     61                     && script != UScript.HAN
     62                     && script != UScript.HIRAGANA
     63                     && testedUnavailableScript
     64                     ) {
     65                 continue;
     66             }
     67 
     68             String scriptName = UScript.getName(script);  // long name
     69             ULocale locale = new ULocale(scriptName);
     70             if (locale.getLanguage().equals("new") || locale.getLanguage().equals("pau")) {
     71                 if (logKnownIssue("11171",
     72                         "long script name loosely looks like a locale ID with a known likely script")) {
     73                     continue;
     74                 }
     75             }
     76             Transliterator t;
     77             try {
     78                 t = Transliterator.getInstance("any-" + scriptName);
     79             } catch (Exception e) {
     80                 testedUnavailableScript = true;
     81                 logln("Skipping unavailable: " + scriptName);
     82                 continue; // we don't handle all scripts
     83             }
     84             logln("Checking: " + scriptName);
     85             if (t != null) {
     86                 t.transform(test); // just verify we don't crash
     87             }
     88             String shortScriptName = UScript.getShortName(script);  // 4-letter script code
     89             try {
     90                 t = Transliterator.getInstance("any-" + shortScriptName);
     91             } catch (Exception e) {
     92                 errln("Transliterator.getInstance() worked for \"any-" + scriptName +
     93                         "\" but not for \"any-" + shortScriptName + '\"');
     94             }
     95             t.transform(test); // just verify we don't crash
     96         }
     97     }
     98 
     99     /**
    100      * Check to make sure that wide characters are converted when going to narrow scripts.
    101      */
    102     @Test
    103     public void TestForWidth(){
    104         Transliterator widen = Transliterator.getInstance("halfwidth-fullwidth");
    105         Transliterator narrow = Transliterator.getInstance("fullwidth-halfwidth");
    106         UnicodeSet ASCII = new UnicodeSet("[:ascii:]");
    107         String lettersAndSpace = "abc def";
    108         final String punctOnly = "( )";
    109 
    110         String wideLettersAndSpace = widen.transform(lettersAndSpace);
    111         String widePunctOnly = widen.transform(punctOnly);
    112         assertContainsNone("Should be wide", ASCII, wideLettersAndSpace);
    113         assertContainsNone("Should be wide", ASCII, widePunctOnly);
    114 
    115         String back;
    116         back = narrow.transform(wideLettersAndSpace);
    117         assertEquals("Should be narrow", lettersAndSpace, back);
    118         back = narrow.transform(widePunctOnly);
    119         assertEquals("Should be narrow", punctOnly, back);
    120 
    121         Transliterator latin = Transliterator.getInstance("any-Latn");
    122         back = latin.transform(wideLettersAndSpace);
    123         assertEquals("Should be ascii", lettersAndSpace, back);
    124 
    125         back = latin.transform(widePunctOnly);
    126         assertEquals("Should be ascii", punctOnly, back);
    127 
    128         // Han-Latin is now forward-only per CLDR ticket #5630
    129         //Transliterator t2 = Transliterator.getInstance("any-Han");
    130         //back = t2.transform(widePunctOnly);
    131         //assertEquals("Should be same", widePunctOnly, back);
    132 
    133 
    134     }
    135 
    136     @Test
    137     public void TestCommonDigits() {
    138         UnicodeSet westernDigitSet = new UnicodeSet("[0-9]");
    139         UnicodeSet westernDigitSetAndMarks = new UnicodeSet("[[0-9][:Mn:]]");
    140         UnicodeSet arabicDigitSet = new UnicodeSet("[[:Nd:]&[:block=Arabic:]]");
    141         Transliterator latin = Transliterator.getInstance("Any-Latn");
    142         Transliterator arabic = Transliterator.getInstance("Any-Arabic");
    143         String westernDigits = getList(westernDigitSet);
    144         String arabicDigits = getList(arabicDigitSet);
    145 
    146         String fromArabic = latin.transform(arabicDigits);
    147         assertContainsAll("Any-Latin transforms Arabic digits", westernDigitSetAndMarks, fromArabic);
    148         if (false) { // we don't require conversion to Arabic digits
    149             String fromLatin = arabic.transform(westernDigits);
    150             assertContainsAll("Any-Arabic transforms Western digits", arabicDigitSet, fromLatin);
    151         }
    152     }
    153 
    154     // might want to add to TestFmwk
    155     private void assertContainsAll(String message, UnicodeSet set, String string) {
    156         handleAssert(set.containsAll(string), message, set, string, "contains all of", false);
    157     }
    158 
    159     private void assertContainsNone(String message, UnicodeSet set, String string) {
    160         handleAssert(set.containsNone(string), message, set, string, "contains none of", false);
    161     }
    162 
    163     // might want to add to UnicodeSet
    164     private String getList(UnicodeSet set) {
    165         StringBuffer result = new StringBuffer();
    166         for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
    167             result.append(it.getString());
    168         }
    169         return result.toString();
    170     }
    171 }
    172