1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2009-2014, Google, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.test.translit; 10 11 import org.junit.Test; 12 import org.junit.runner.RunWith; 13 import org.junit.runners.JUnit4; 14 15 import com.ibm.icu.dev.test.TestFmwk; 16 import com.ibm.icu.lang.UScript; 17 import com.ibm.icu.text.Transliterator; 18 import com.ibm.icu.text.UTF16; 19 import com.ibm.icu.text.UnicodeSet; 20 import com.ibm.icu.text.UnicodeSetIterator; 21 import com.ibm.icu.util.ULocale; 22 23 /** 24 * @author markdavis 25 * 26 */ 27 @RunWith(JUnit4.class) 28 public class AnyScriptTest extends TestFmwk { 29 @Test 30 public void TestContext() { 31 Transliterator t = Transliterator.createFromRules("foo", "::[bc]; a{b}d > B;", Transliterator.FORWARD); 32 String sample = "abd abc b"; 33 assertEquals("context works", "aBd abc b", t.transform(sample)); 34 } 35 36 @Test 37 public void TestScripts(){ 38 // get a couple of characters of each script for testing 39 40 StringBuffer testBuffer = new StringBuffer(); 41 for (int script = 0; script < UScript.CODE_LIMIT; ++script) { 42 UnicodeSet test = new UnicodeSet().applyPropertyAlias("script", UScript.getName(script)); 43 int count = Math.min(20, test.size()); 44 for (int i = 0; i < count; ++i){ 45 testBuffer.append(UTF16.valueOf(test.charAt(i))); 46 } 47 } 48 String test = testBuffer.toString(); 49 logln("Test line: " + test); 50 51 int inclusion = TestFmwk.getExhaustiveness(); 52 boolean testedUnavailableScript = false; 53 54 for (int script = 0; script < UScript.CODE_LIMIT; ++script) { 55 if (script == UScript.COMMON || script == UScript.INHERITED) { 56 continue; 57 } 58 // if the inclusion rate is not 10, skip all but a small number of items. 59 // Make sure, however, that we test at least one unavailable script 60 if (inclusion < 10 && script != UScript.LATIN 61 && script != UScript.HAN 62 && script != UScript.HIRAGANA 63 && testedUnavailableScript 64 ) { 65 continue; 66 } 67 68 String scriptName = UScript.getName(script); // long name 69 ULocale locale = new ULocale(scriptName); 70 if (locale.getLanguage().equals("new") || locale.getLanguage().equals("pau")) { 71 if (logKnownIssue("11171", 72 "long script name loosely looks like a locale ID with a known likely script")) { 73 continue; 74 } 75 } 76 Transliterator t; 77 try { 78 t = Transliterator.getInstance("any-" + scriptName); 79 } catch (Exception e) { 80 testedUnavailableScript = true; 81 logln("Skipping unavailable: " + scriptName); 82 continue; // we don't handle all scripts 83 } 84 logln("Checking: " + scriptName); 85 if (t != null) { 86 t.transform(test); // just verify we don't crash 87 } 88 String shortScriptName = UScript.getShortName(script); // 4-letter script code 89 try { 90 t = Transliterator.getInstance("any-" + shortScriptName); 91 } catch (Exception e) { 92 errln("Transliterator.getInstance() worked for \"any-" + scriptName + 93 "\" but not for \"any-" + shortScriptName + '\"'); 94 } 95 t.transform(test); // just verify we don't crash 96 } 97 } 98 99 /** 100 * Check to make sure that wide characters are converted when going to narrow scripts. 101 */ 102 @Test 103 public void TestForWidth(){ 104 Transliterator widen = Transliterator.getInstance("halfwidth-fullwidth"); 105 Transliterator narrow = Transliterator.getInstance("fullwidth-halfwidth"); 106 UnicodeSet ASCII = new UnicodeSet("[:ascii:]"); 107 String lettersAndSpace = "abc def"; 108 final String punctOnly = "( )"; 109 110 String wideLettersAndSpace = widen.transform(lettersAndSpace); 111 String widePunctOnly = widen.transform(punctOnly); 112 assertContainsNone("Should be wide", ASCII, wideLettersAndSpace); 113 assertContainsNone("Should be wide", ASCII, widePunctOnly); 114 115 String back; 116 back = narrow.transform(wideLettersAndSpace); 117 assertEquals("Should be narrow", lettersAndSpace, back); 118 back = narrow.transform(widePunctOnly); 119 assertEquals("Should be narrow", punctOnly, back); 120 121 Transliterator latin = Transliterator.getInstance("any-Latn"); 122 back = latin.transform(wideLettersAndSpace); 123 assertEquals("Should be ascii", lettersAndSpace, back); 124 125 back = latin.transform(widePunctOnly); 126 assertEquals("Should be ascii", punctOnly, back); 127 128 // Han-Latin is now forward-only per CLDR ticket #5630 129 //Transliterator t2 = Transliterator.getInstance("any-Han"); 130 //back = t2.transform(widePunctOnly); 131 //assertEquals("Should be same", widePunctOnly, back); 132 133 134 } 135 136 @Test 137 public void TestCommonDigits() { 138 UnicodeSet westernDigitSet = new UnicodeSet("[0-9]"); 139 UnicodeSet westernDigitSetAndMarks = new UnicodeSet("[[0-9][:Mn:]]"); 140 UnicodeSet arabicDigitSet = new UnicodeSet("[[:Nd:]&[:block=Arabic:]]"); 141 Transliterator latin = Transliterator.getInstance("Any-Latn"); 142 Transliterator arabic = Transliterator.getInstance("Any-Arabic"); 143 String westernDigits = getList(westernDigitSet); 144 String arabicDigits = getList(arabicDigitSet); 145 146 String fromArabic = latin.transform(arabicDigits); 147 assertContainsAll("Any-Latin transforms Arabic digits", westernDigitSetAndMarks, fromArabic); 148 if (false) { // we don't require conversion to Arabic digits 149 String fromLatin = arabic.transform(westernDigits); 150 assertContainsAll("Any-Arabic transforms Western digits", arabicDigitSet, fromLatin); 151 } 152 } 153 154 // might want to add to TestFmwk 155 private void assertContainsAll(String message, UnicodeSet set, String string) { 156 handleAssert(set.containsAll(string), message, set, string, "contains all of", false); 157 } 158 159 private void assertContainsNone(String message, UnicodeSet set, String string) { 160 handleAssert(set.containsNone(string), message, set, string, "contains none of", false); 161 } 162 163 // might want to add to UnicodeSet 164 private String getList(UnicodeSet set) { 165 StringBuffer result = new StringBuffer(); 166 for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { 167 result.append(it.getString()); 168 } 169 return result.toString(); 170 } 171 } 172