Home | History | Annotate | Download | only in translit
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2009-2015, International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 package android.icu.dev.test.translit;
     11 
     12 import java.util.ArrayList;
     13 import java.util.List;
     14 import java.util.regex.Matcher;
     15 import java.util.regex.Pattern;
     16 
     17 import org.junit.Test;
     18 import org.junit.runner.RunWith;
     19 import org.junit.runners.JUnit4;
     20 
     21 import android.icu.dev.test.TestFmwk;
     22 import android.icu.impl.UnicodeRegex;
     23 import android.icu.lang.UCharacter;
     24 import android.icu.lang.UProperty;
     25 import android.icu.lang.UProperty.NameChoice;
     26 import android.icu.text.Transliterator;
     27 import android.icu.text.UTF16;
     28 import android.icu.text.UnicodeSet;
     29 import android.icu.testsharding.MainTestShard;
     30 
     31 /**
     32  * @author markdavis
     33  */
     34 @MainTestShard
     35 @RunWith(JUnit4.class)
     36 public class RegexUtilitiesTest extends TestFmwk {
     37     /**
     38      * Check basic construction.
     39      */
     40     @Test
     41     public void TestConstruction() {
     42         String[][] tests = {
     43                 {"a"},
     44                 {"a[a-z]b"},
     45                 {"[ba-z]", "[a-z]"},
     46                 {"q[ba-z]", "q[a-z]"},
     47                 {"[ba-z]q", "[a-z]q"},
     48                 {"a\\p{joincontrol}b", "a[\u200C\u200D]b"},
     49                 {"a\\P{joincontrol}b", "a[^\u200C\u200D]b"},
     50                 {"a[[:whitespace:]&[:Zl:]]b", "a[\\\u2028]b"},
     51                 {"a [[:bc=cs:]&[:wspace:]] b", "a [\u00A0\u202F] b"},
     52         };
     53         for (int i = 0; i < tests.length; ++i) {
     54             final String source = tests[i][0];
     55             String expected = tests[i].length == 1 ? source : tests[i][1];
     56             String actual = UnicodeRegex.fix(source);
     57             assertEquals(source, expected, actual);
     58         }
     59     }
     60 
     61     Transliterator hex = Transliterator.getInstance("hex");
     62 
     63     /**
     64      * Perform an exhaustive test on all Unicode characters to make sure that the UnicodeSet with each
     65      * character works.
     66      */
     67     @Test
     68     public void TestCharacters() {
     69         UnicodeSet requiresQuote = new UnicodeSet("[\\$\\&\\-\\:\\[\\\\\\]\\^\\{\\}[:pattern_whitespace:]]");
     70         boolean skip = TestFmwk.getExhaustiveness() < 10;
     71         for (int cp = 0; cp < 0x110000; ++cp) {
     72             if (cp > 0xFF && skip && (cp % 37 != 0)) {
     73                 continue;
     74             }
     75             String cpString = UTF16.valueOf(cp);
     76             String s = requiresQuote.contains(cp) ? "\\" + cpString : cpString;
     77             String pattern = null;
     78             final String rawPattern = "[" + s + s + "]";
     79             try {
     80                 pattern = UnicodeRegex.fix(rawPattern);
     81             } catch (Exception e) {
     82                 errln(e.getMessage());
     83                 continue;
     84             }
     85             final String expected = "[" + s + "]";
     86             assertEquals("Doubled character works" + hex.transform(s), expected, pattern);
     87 
     88             // verify that we can create a regex pattern and use as expected
     89             String shouldNotMatch = UTF16.valueOf((cp + 1) % 0x110000);
     90             checkCharPattern(Pattern.compile(pattern), pattern, cpString, shouldNotMatch);
     91 
     92             // verify that the Pattern.compile works
     93             checkCharPattern(UnicodeRegex.compile(rawPattern), pattern, cpString, shouldNotMatch);
     94         }
     95     }
     96 
     97     /**
     98      * Check all integer Unicode properties to make sure they work.
     99      */
    100     @Test
    101     public void TestUnicodeProperties() {
    102         final boolean skip = TestFmwk.getExhaustiveness() < 10;
    103         UnicodeSet temp = new UnicodeSet();
    104         for (int propNum = UProperty.INT_START; propNum < UProperty.INT_LIMIT; ++propNum) {
    105             if (skip && (propNum % 5 != 0)) {
    106                 continue;
    107             }
    108             String propName = UCharacter.getPropertyName(propNum, NameChoice.LONG);
    109             final int intPropertyMinValue = UCharacter.getIntPropertyMinValue(propNum);
    110             int intPropertyMaxValue = UCharacter.getIntPropertyMaxValue(propNum);
    111             if (skip) { // only test first if not exhaustive
    112                 intPropertyMaxValue = intPropertyMinValue;
    113             }
    114             for (int valueNum = intPropertyMinValue; valueNum <= intPropertyMaxValue; ++valueNum) {
    115                 // hack for getting property value name
    116                 String valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.LONG);
    117                 if (valueName == null) {
    118                     valueName = UCharacter.getPropertyValueName(propNum, valueNum, NameChoice.SHORT);
    119                     if (valueName == null) {
    120                         valueName = Integer.toString(valueNum);
    121                     }
    122                 }
    123                 temp.applyIntPropertyValue(propNum, valueNum);
    124                 if (temp.size() == 0) {
    125                     continue;
    126                 }
    127                 final String prefix = "a";
    128                 final String suffix = "b";
    129                 String shouldMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
    130                 temp.complement();
    131                 String shouldNotMatch = prefix + UTF16.valueOf(temp.charAt(0)) + suffix;
    132 
    133                 // posix style pattern
    134                 String rawPattern = prefix + "[:" + propName + "=" + valueName + ":]" + suffix;
    135                 String rawNegativePattern = prefix + "[:^" + propName + "=" + valueName + ":]" + suffix;
    136                 checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
    137                 checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
    138 
    139                 // perl style pattern
    140                 rawPattern = prefix + "\\p{" + propName + "=" + valueName + "}" + suffix;
    141                 rawNegativePattern = prefix + "\\P{" + propName + "=" + valueName + "}" + suffix;
    142                 checkCharPattern(UnicodeRegex.compile(rawPattern), rawPattern, shouldMatch, shouldNotMatch);
    143                 checkCharPattern(UnicodeRegex.compile(rawNegativePattern), rawNegativePattern, shouldNotMatch, shouldMatch);
    144             }
    145         }
    146     }
    147 
    148     @Test
    149     public void TestBnf() {
    150         UnicodeRegex regex = new UnicodeRegex();
    151         final String[][] tests = {
    152                 {
    153                     "c = a wq;\n" +
    154                     "a = xyz;\n" +
    155                     "b = a a c;\n"
    156                 },
    157                 {
    158                     "c = a b;\n" +
    159                     "a = xyz;\n" +
    160                     "b = a a c;\n",
    161                     "Exception"
    162                 },
    163                 {
    164                     "uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;\n" +
    165                     "scheme = reserved+;\n" +
    166                     "host = // reserved+;\n" +
    167                     "query = [\\=reserved]+;\n" +
    168                     "fragment = reserved+;\n" +
    169                     "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",
    170                 "http://\u03B1\u03B2\u03B3?huh=hi#there"},
    171                 {
    172                     "langtagRegex.txt"
    173                 }
    174         };
    175         for (int i = 0; i < tests.length; ++i) {
    176             String test = tests[i][0];
    177             final boolean expectException = tests[i].length < 2 ? false : tests[i][1].equals("Exception");
    178             try {
    179                 String result;
    180                 if (test.endsWith(".txt")) {
    181                     java.io.InputStream is = RegexUtilitiesTest.class.getResourceAsStream(test);
    182                     List lines;
    183                     try {
    184                         lines = UnicodeRegex.appendLines(new ArrayList(), is, "UTF-8");
    185                     } finally {
    186                         is.close();
    187                     }
    188                     result = regex.compileBnf(lines);
    189                 } else {
    190                     result = regex.compileBnf(test);
    191                 }
    192                 if (expectException) {
    193                     errln("Expected exception for " + test);
    194                     continue;
    195                 }
    196                 result = result.replaceAll("[0-9]+%", ""); // just so we can use the language subtag stuff
    197                 String resolved = regex.transform(result);
    198                 logln(resolved);
    199                 Matcher m = Pattern.compile(resolved, Pattern.COMMENTS).matcher("");
    200                 String checks = "";
    201                 for (int j = 1; j < tests[i].length; ++j) {
    202                     String check = tests[i][j];
    203                     if (!m.reset(check).matches()) {
    204                         checks = checks + "Fails " + check + "\n";
    205                     } else {
    206                         for (int k = 1; k <= m.groupCount(); ++k) {
    207                             checks += "(" + m.group(k) + ")";
    208                         }
    209                         checks += "\n";
    210                     }
    211                 }
    212                 logln("Result: " + result + "\n" + checks + "\n" + test);
    213             } catch (Exception e) {
    214                 if (!expectException) {
    215                     errln(e.getClass().getName() + ": " + e.getMessage());
    216                 }
    217                 continue;
    218             }
    219         }
    220     }
    221 
    222     /**
    223      * Utility for checking patterns
    224      */
    225     private void checkCharPattern(Pattern pat, String matchTitle, String shouldMatch, String shouldNotMatch) {
    226         Matcher matcher = pat.matcher(shouldMatch);
    227         assertTrue(matchTitle + " and " + shouldMatch, matcher.matches());
    228         matcher.reset(shouldNotMatch);
    229         assertFalse(matchTitle + " and " + shouldNotMatch, matcher.matches());
    230     }
    231 }
    232