Home | History | Annotate | Download | only in util
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 2002-2012, International Business Machines Corporation and    *
      4  * others. All Rights Reserved.                                                *
      5  *******************************************************************************
      6  */
      7 package com.ibm.icu.dev.test.util;
      8 
      9 import java.util.Random;
     10 
     11 import com.ibm.icu.dev.util.BNF;
     12 import com.ibm.icu.dev.util.Pick;
     13 import com.ibm.icu.dev.util.Quoter;
     14 import com.ibm.icu.dev.util.Tokenizer;
     15 import com.ibm.icu.text.UnicodeSet;
     16 
     17 public class TestBNF {
     18 
     19     static final String[] testRules = {
     20         "$root = [ab]{3};",
     21 
     22         "$root = [ab]{3,};",
     23 
     24         "$root = [ab]{3,5};",
     25 
     26         "$root = [ab]*;",
     27 
     28         "$root = [ab]?;",
     29 
     30         "$root = [ab]+;",
     31 
     32         "$us = [a-z];" +
     33         "$root = [0-9$us];",
     34 
     35         "$root = a $foo b? 25% end 30% | $foo 50%;\r\n" +
     36         "$foo = c{1,5} 20%;",
     37 
     38         "$root = [a-z]{1,5}~;",
     39 
     40         "$root = [a-z]{5}~;",
     41 
     42         "$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n" +
     43         "$hex = [0-9A-Fa-f];",
     44     };
     45 
     46     static String unicodeSetBNF = "" +
     47     "$root = $leaf | '[' $s $root2 $s ']' ;\r\n" +
     48     "$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n" +
     49     "$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n" +
     50     "$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n" +
     51     "$op = (('&' | '-') $s)? 70%;" +
     52     "$leaf = '[' $s $list $s ']' | $prop;\r\n" +
     53     "$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n" +
     54     "$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n" +
     55     "$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n" +
     56     "$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n" +
     57     "$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n" +
     58     "$hex = [0-9A-Fa-f];\r\n" +
     59     "$s = ' '? 20%;\r\n" +
     60     "$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n" +
     61     "$category = ((general | gc) $s '=' $s)? $catvalue;\r\n" +
     62     "$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n";
     63 
     64     public static void main (String[] args) {
     65         testTokenizer();
     66         for (int i = 0; i < testRules.length; ++i) {
     67             testBNF(testRules[i], null, 20);
     68         }
     69 
     70         testBNF(unicodeSetBNF, null, 20);
     71         //testParser();
     72     }
     73 
     74     static void testBNF(String rules, UnicodeSet chars, int count) {
     75         BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
     76         .addSet("$chars", chars)
     77         .addRules(rules)
     78         .complete();
     79 
     80         System.out.println("====================================");
     81         System.out.println("BNF");
     82         System.out.println(rules);
     83         System.out.println(bnf.getInternal());
     84         for (int i = 0; i < count; ++i) {
     85             System.out.println(i + ": " + bnf.next());
     86         }
     87     }
     88 
     89     /*
     90     public static testManual() {
     91         Pick p = Pick.maybe(75,Pick.unquoted("a"));
     92         testOr(p, 1);
     93         p = Pick.or(new String[]{"", "a", "bb", "ccc"});
     94         testOr(p, 3);
     95         p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a");
     96         testOr(p, 5);
     97         p = Pick.codePoint("[a-ce]");
     98         testCodePoints(p);
     99         p = Pick.codePoint("[a-ce]");
    100         testCodePoints(p);
    101         p = Pick.string(2, 8, p);
    102         testOr(p,10);
    103 
    104         p = Pick.or(new String[]{"", "a", "bb", "ccc"});
    105         p = Pick.and(p).and2(p).and2("&");
    106         testMatch(p, "abb&");
    107         testMatch(p, "bba");
    108 
    109         // testEnglish();
    110     }
    111     */
    112 
    113     static void testMatch(Pick p, String source) {
    114         Pick.Position pp = new Pick.Position();
    115         boolean value = p.match(source, pp);
    116         System.out.println("Match: " + value + ", " + pp);
    117     }
    118     /*
    119     static void testParser() {
    120         try {
    121             Pick.Target target = new Pick.Target();
    122             for (int i = 0; i < rules.length; ++i) {
    123                 target.addRule(rules[i]);
    124             }
    125         } catch (ParseException e) {
    126             // TODO Auto-generated catch block
    127             e.printStackTrace();
    128         }
    129     }
    130     */
    131 
    132     static class Counts {
    133         int[] counts;
    134         Counts(int max) {
    135             counts = new int[max+1];
    136         }
    137         void inc(int index) {
    138             counts[index]++;
    139         }
    140         void show() {
    141             System.out.println("Printing Counts");
    142             for (int i = 0; i < counts.length; ++i) {
    143                 if (counts[i] == 0) continue;
    144                 System.out.println(i + ": " + counts[i]);
    145             }
    146             System.out.println();
    147         }
    148     }
    149 
    150 /*    static final String[] rules = {
    151         "$s = ' ';",
    152         "$noun = dog | house | government | wall | street | zebra;",
    153         "$adjective = red | glorious | simple | nasty | heavy | clean;",
    154         "$article = quickly | oddly | silently | boldly;",
    155         "$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;",
    156         "$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;",
    157         "$verb = goes | fishes | walks | sleeps;",
    158         "$tverb = carries | lifts | overturns | hits | jumps on;",
    159         "$copula = is 30% | seems 10%;",
    160         "$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;",
    161         "$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;",
    162         "$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;",
    163         "$conj = but | and | or;",
    164         "$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;",
    165         "$sentence = $sentence4 '.';"};
    166  */
    167     /*
    168     private static void testEnglish() {
    169         Pick s = Pick.unquoted(" ");
    170         Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"});
    171         Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"});
    172         Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"});
    173         Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"});
    174         Pick articles = Pick.or(new String[]{"the", "a"});
    175         Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"});
    176         Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives);
    177         Pick nounPhrase = Pick.and(articles).and2(s)
    178             .and2(0.3, Pick.and(adjectivePhrase).and2(s))
    179             .and2(nouns);
    180         Pick copula = Pick.or(new String[]{"is", "seems"});
    181         Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs)
    182             .and2(0.3, Pick.and(s).and2(adverbs)).name("s1");
    183         Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase)
    184             .and2(0.3, Pick.and(s).and2(adverbs)).name("s2");
    185         Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3");
    186         Pick conj = Pick.or(new String[]{", but", ", and", ", or"});
    187         Pick forward = Pick.unquoted("forward");
    188         Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part");
    189         Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence");
    190         sentenceBase.replace(forward, sentenceBase);
    191         Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted("."));
    192         Pick.Target target = Pick.Target.make(sentence);
    193         for (int i = 0; i < 50; ++i) {
    194             System.out.println(i + ": " + target.next());
    195         }
    196     }
    197     private static void testOr(Pick p, int count) {
    198         Pick.Target target = Pick.Target.make(p);
    199         Counts counts = new Counts(count + 10);
    200         for (int i = 0; i < 1000; ++i) {
    201             String s = target.next();
    202             counts.inc(s.length());
    203         }
    204         counts.show();
    205     }
    206     private static void testCodePoints(Pick p) {
    207         Pick.Target target = Pick.Target.make(p);
    208         Counts counts = new Counts(128);
    209         for (int i = 0; i < 10000; ++i) {
    210             String s = target.next();
    211             counts.inc(s.charAt(0));
    212         }
    213         counts.show();
    214     }
    215     */
    216     public static void printRandoms() {
    217         BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter())
    218         .addRules("[a-z]{2,5}").complete();
    219         System.out.println("Start");
    220         for (int i = 0; i < 100; ++i) {
    221             String temp = bnf.next();
    222             System.out.println(i + ")\t" + temp);
    223         }
    224     }
    225 
    226     public static void testTokenizer() {
    227         Tokenizer t = new Tokenizer();
    228 
    229         String[] samples = {"a'b'c d #abc\r e", "'a '123 321",
    230             "\\\\", "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b"};
    231         for (int i = 0; i < samples.length; ++i) {
    232             t.setSource(samples[i]);
    233             System.out.println();
    234             System.out.println("Input: " + t.getSource());
    235             int type = 0;
    236             while (type != Tokenizer.DONE) {
    237                 type = t.next();
    238                 System.out.println(t.toString(type, false));
    239             }
    240         }
    241     }
    242 
    243 }
    244 
    245