1 /* 2 ******************************************************************************* 3 * Copyright (C) 2002-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.dev.test.util; 8 9 import java.util.Random; 10 11 import com.ibm.icu.dev.util.BNF; 12 import com.ibm.icu.dev.util.Pick; 13 import com.ibm.icu.dev.util.Quoter; 14 import com.ibm.icu.dev.util.Tokenizer; 15 import com.ibm.icu.text.UnicodeSet; 16 17 public class TestBNF { 18 19 static final String[] testRules = { 20 "$root = [ab]{3};", 21 22 "$root = [ab]{3,};", 23 24 "$root = [ab]{3,5};", 25 26 "$root = [ab]*;", 27 28 "$root = [ab]?;", 29 30 "$root = [ab]+;", 31 32 "$us = [a-z];" + 33 "$root = [0-9$us];", 34 35 "$root = a $foo b? 25% end 30% | $foo 50%;\r\n" + 36 "$foo = c{1,5} 20%;", 37 38 "$root = [a-z]{1,5}~;", 39 40 "$root = [a-z]{5}~;", 41 42 "$root = '\\' (u | U0010 | U000 $hex) $hex{4} ;\r\n" + 43 "$hex = [0-9A-Fa-f];", 44 }; 45 46 static String unicodeSetBNF = "" + 47 "$root = $leaf | '[' $s $root2 $s ']' ;\r\n" + 48 "$root2 = $leaf | '[' $s $root3 $s ']' | ($root3 $s ($op $root3 $s){0,3}) ;\r\n" + 49 "$root3 = $leaf | '[' $s $root4 $s ']' | ($root4 $s ($op $root4 $s){0,3}) ;\r\n" + 50 "$root4 = $leaf | ($leaf $s ($op $leaf $s){0,3}) ;\r\n" + 51 "$op = (('&' | '-') $s)? 70%;" + 52 "$leaf = '[' $s $list $s ']' | $prop;\r\n" + 53 "$list = ($char $s ('-' $s $char $s)? 30%){1,5} ;\r\n" + 54 "$prop = '\\' (p | P) '{' $s $propName $s '}' | '[:' '^'? $s $propName $s ':]';\r\n" + 55 "$needsQuote = [\\-\\][:whitespace:][:control:]] ;\r\n" + 56 "$char = [[\\u0000-\\U00010FFFF]-$needsQuote] | $quoted ;\r\n" + 57 "$quoted = '\\' ('u' | 'U0010' | 'U000' $hex) $hex{4} ;\r\n" + 58 "$hex = [0-9A-Fa-f];\r\n" + 59 "$s = ' '? 20%;\r\n" + 60 "$propName = (whitespace | ws) | (uppercase | uc) | (lowercase | lc) | $category;\r\n" + 61 "$category = ((general | gc) $s '=' $s)? $catvalue;\r\n" + 62 "$catvalue = (C | Other | Cc | Control | Cf | Format | Cn | Unassigned | L | Letter);\r\n"; 63 64 public static void main (String[] args) { 65 testTokenizer(); 66 for (int i = 0; i < testRules.length; ++i) { 67 testBNF(testRules[i], null, 20); 68 } 69 70 testBNF(unicodeSetBNF, null, 20); 71 //testParser(); 72 } 73 74 static void testBNF(String rules, UnicodeSet chars, int count) { 75 BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter()) 76 .addSet("$chars", chars) 77 .addRules(rules) 78 .complete(); 79 80 System.out.println("===================================="); 81 System.out.println("BNF"); 82 System.out.println(rules); 83 System.out.println(bnf.getInternal()); 84 for (int i = 0; i < count; ++i) { 85 System.out.println(i + ": " + bnf.next()); 86 } 87 } 88 89 /* 90 public static testManual() { 91 Pick p = Pick.maybe(75,Pick.unquoted("a")); 92 testOr(p, 1); 93 p = Pick.or(new String[]{"", "a", "bb", "ccc"}); 94 testOr(p, 3); 95 p = Pick.repeat(3, 5, new int[]{20, 30, 20}, "a"); 96 testOr(p, 5); 97 p = Pick.codePoint("[a-ce]"); 98 testCodePoints(p); 99 p = Pick.codePoint("[a-ce]"); 100 testCodePoints(p); 101 p = Pick.string(2, 8, p); 102 testOr(p,10); 103 104 p = Pick.or(new String[]{"", "a", "bb", "ccc"}); 105 p = Pick.and(p).and2(p).and2("&"); 106 testMatch(p, "abb&"); 107 testMatch(p, "bba"); 108 109 // testEnglish(); 110 } 111 */ 112 113 static void testMatch(Pick p, String source) { 114 Pick.Position pp = new Pick.Position(); 115 boolean value = p.match(source, pp); 116 System.out.println("Match: " + value + ", " + pp); 117 } 118 /* 119 static void testParser() { 120 try { 121 Pick.Target target = new Pick.Target(); 122 for (int i = 0; i < rules.length; ++i) { 123 target.addRule(rules[i]); 124 } 125 } catch (ParseException e) { 126 // TODO Auto-generated catch block 127 e.printStackTrace(); 128 } 129 } 130 */ 131 132 static class Counts { 133 int[] counts; 134 Counts(int max) { 135 counts = new int[max+1]; 136 } 137 void inc(int index) { 138 counts[index]++; 139 } 140 void show() { 141 System.out.println("Printing Counts"); 142 for (int i = 0; i < counts.length; ++i) { 143 if (counts[i] == 0) continue; 144 System.out.println(i + ": " + counts[i]); 145 } 146 System.out.println(); 147 } 148 } 149 150 /* static final String[] rules = { 151 "$s = ' ';", 152 "$noun = dog | house | government | wall | street | zebra;", 153 "$adjective = red | glorious | simple | nasty | heavy | clean;", 154 "$article = quickly | oddly | silently | boldly;", 155 "$adjectivePhrase = ($adverb $s)? 50% $adjective* 0% 30% 20% 10%;", 156 "$nounPhrase = $articles $s ($adjectivePhrase $s)? 30% $noun;", 157 "$verb = goes | fishes | walks | sleeps;", 158 "$tverb = carries | lifts | overturns | hits | jumps on;", 159 "$copula = is 30% | seems 10%;", 160 "$sentence1 = $nounPhrase $s $verb $s ($s $adverb)? 30%;", 161 "$sentence2 = $nounPhrase $s $tverb $s $nounPhrase ($s $adverb)? 30%;", 162 "$sentence3 = $nounPhrase $s $copula $s $adjectivePhrase;", 163 "$conj = but | and | or;", 164 "$sentence4 = $sentence1 | $sentence2 | $sentence3 20% | $sentence4 $conj $sentence4 20%;", 165 "$sentence = $sentence4 '.';"}; 166 */ 167 /* 168 private static void testEnglish() { 169 Pick s = Pick.unquoted(" "); 170 Pick verbs = Pick.or(new String[]{"goes", "fishes", "walks", "sleeps"}); 171 Pick transitive = Pick.or(new String[]{"carries", "lifts", "overturns", "hits", "jumps on"}); 172 Pick nouns = Pick.or(new String[]{"dog", "house", "government", "wall", "street", "zebra"}); 173 Pick adjectives = Pick.or(new String[]{"red", "glorious", "simple", "nasty", "heavy", "clean"}); 174 Pick articles = Pick.or(new String[]{"the", "a"}); 175 Pick adverbs = Pick.or(new String[]{"quickly", "oddly", "silently", "boldly"}); 176 Pick adjectivePhrase = Pick.and(0.5, Pick.and(adverbs).and2(s)).and2(adjectives); 177 Pick nounPhrase = Pick.and(articles).and2(s) 178 .and2(0.3, Pick.and(adjectivePhrase).and2(s)) 179 .and2(nouns); 180 Pick copula = Pick.or(new String[]{"is", "seems"}); 181 Pick sentence1 = Pick.and(nounPhrase).and2(s).and2(verbs) 182 .and2(0.3, Pick.and(s).and2(adverbs)).name("s1"); 183 Pick sentence2 = Pick.and(nounPhrase).and2(s).and2(transitive).and2(s).and2(nounPhrase) 184 .and2(0.3, Pick.and(s).and2(adverbs)).name("s2"); 185 Pick sentence3 = Pick.and(nounPhrase).and2(s).and2(copula).and2(s).and2(adjectivePhrase).name("s3"); 186 Pick conj = Pick.or(new String[]{", but", ", and", ", or"}); 187 Pick forward = Pick.unquoted("forward"); 188 Pick pair = Pick.and(forward).and2(conj).and2(s).and2(forward).name("part"); 189 Pick sentenceBase = Pick.or(sentence1).or2(sentence2).or2(sentence3).or2(0.6666, pair).name("sentence"); 190 sentenceBase.replace(forward, sentenceBase); 191 Pick sentence = Pick.and(sentenceBase).and2(Pick.unquoted(".")); 192 Pick.Target target = Pick.Target.make(sentence); 193 for (int i = 0; i < 50; ++i) { 194 System.out.println(i + ": " + target.next()); 195 } 196 } 197 private static void testOr(Pick p, int count) { 198 Pick.Target target = Pick.Target.make(p); 199 Counts counts = new Counts(count + 10); 200 for (int i = 0; i < 1000; ++i) { 201 String s = target.next(); 202 counts.inc(s.length()); 203 } 204 counts.show(); 205 } 206 private static void testCodePoints(Pick p) { 207 Pick.Target target = Pick.Target.make(p); 208 Counts counts = new Counts(128); 209 for (int i = 0; i < 10000; ++i) { 210 String s = target.next(); 211 counts.inc(s.charAt(0)); 212 } 213 counts.show(); 214 } 215 */ 216 public static void printRandoms() { 217 BNF bnf = new BNF(new Random(0), new Quoter.RuleQuoter()) 218 .addRules("[a-z]{2,5}").complete(); 219 System.out.println("Start"); 220 for (int i = 0; i < 100; ++i) { 221 String temp = bnf.next(); 222 System.out.println(i + ")\t" + temp); 223 } 224 } 225 226 public static void testTokenizer() { 227 Tokenizer t = new Tokenizer(); 228 229 String[] samples = {"a'b'c d #abc\r e", "'a '123 321", 230 "\\\\", "a'b", "a'", "abc def%?ghi", "%", "a", "\\ a", "a''''b"}; 231 for (int i = 0; i < samples.length; ++i) { 232 t.setSource(samples[i]); 233 System.out.println(); 234 System.out.println("Input: " + t.getSource()); 235 int type = 0; 236 while (type != Tokenizer.DONE) { 237 type = t.next(); 238 System.out.println(t.toString(type, false)); 239 } 240 } 241 } 242 243 } 244 245