1 /* 2 ****************************************************************************** 3 * Copyright (C) 2005, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 /* 8 WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF 9 and standard BNF is the WBNF accepts weight for its alternation items. 10 The weight specifies the opportunity it will be selected. 11 12 The purpose of WBNF is to help generate a random string from a given grammar 13 which can be described with standard BNF. The introduction of 'weight' 14 is to guide the generator to give the specific parts different chances to be 15 generated. 16 17 Usually, the user gives LanguageGenerator the grammar description in WBNF, 18 then LanguageGenerator will generate a random string on every next() call. 19 The return code of parseBNF() can help user to determine the error, 20 either in the grammar description or in the WBNF parser itself. 21 22 23 The grammar of WBNF itself can be described in standard BNF, 24 25 escaping = _single character with a leading back slash, either inside or outside quoting_ 26 quoting = _quoted with a pair of single quotation marks_ 27 string = string alphabet | string digit | string quoting | string escaping | 28 alphabet | quoting | escaping 29 alphabet = 30 digit = 31 integer = integer digit | digit 32 weight = integer % 33 weight-list = weight-list weight | weight 34 var = var alphabet | var digit | $ alphabet 35 36 var-defs = var-defs var-def | var-def 37 var-def = var '=' definition; 38 39 alternation = alternation '|' alt-item | alt-item 40 alt-item = sequence | sequence weight 41 42 sequence = sequence modified | modified 43 44 modified = core | morph | quote | repeat 45 morph = modified ~ 46 quote = modified @ 47 repeat = modified quantifier | modified quantifier weight-list 48 quantifier = ? | * | + | { integer , integer} | {integer, } | {integer} 49 50 core = var | string | '(' definition ')' 51 52 definition = core | modified | sequence | alternation 53 definition = alternation 54 55 Remarks: 56 o Following characters are literals in preceding definition 57 but are syntax symbols in WBNF 58 59 % $ ~ @ ? * + { } , 60 61 o Following character are syntax symbols in preceding definition 62 (sapce) contact operation, or separators to increase readability 63 = definition 64 | selection operation 65 ( ) precedence select 66 ' ' override special-character to plain character 67 68 o the definition of 'escaping' and 'quoting' are preceding definition text 69 o infinite is actually a predefine value PSEUDO_INFINIT defined in this file 70 o if weight is not presented in "alt-item' and 'repeat', 71 a default weight DEFAULT_WEIGHT defined in this file is used 72 73 o * == {0, } 74 + == {1, } 75 ? == {0, 1} 76 77 o the weight-list for repeat assigns the weights for repeat itmes one by one 78 79 demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100% 80 81 To find more explain of the weight-list, please see the LIMITATION of the grammar 82 83 o but the weight-list for question mark has different meaning 84 85 demo ? 30% != demo{0,1} 30% 100% 86 demo ? 30% == demo{0,1} 70% 30% 87 88 the 70% is calculated from (DEFAULT_WEIGHT - weight) 89 90 91 Known LIMITATION of the grammar 92 For 'repeat', the parser will eat up as much as possible weights at one time, 93 discard superfluous weights if it is too much, 94 fill insufficient weights with default weight if it is too less. 95 This behavior means following definitions are equal 96 97 demo{1,3} 30% 40% 100% 98 demo{1,3} 30% 40% 100% 50% 99 demo{1,3} 30% 40% 100 101 This behavior will cause a little confusion when defining an alternation 102 103 demo{1,3} 30% 40% 100% 50% | show 20% 104 105 is interpreted as 106 107 (demo{1,3} 30% 40% 100%) 100% | show 20% 108 109 not 110 111 (demo{1,3} 30% 40% 100%) 50% | show 20% 112 113 to get an expected definition, please use parentheses. 114 115 Known LIMITATION of current implement 116 Due to the well known point alias problem, current Parser will be effectively 117 crashed if the definition looks like 118 119 $a = demo; 120 $b = $a; 121 $c = $a; 122 or 123 $a = demo; 124 $b = $a $a; 125 or 126 $a = demo; 127 $b = $b $a; 128 129 The crash will occur at delete operation in destructor or other memory release code. 130 Several plans are on hard to fix the problem. Use a smart point with reference count, 131 or use a central memory management solution. But now, it works well with collation 132 monkey test, which is the only user for WBNF. 133 */ 134 135 #ifndef _WBNF 136 #define _WBNF 137 138 #include "unicode/utypes.h" 139 140 const int DEFAULT_WEIGHT = 100; 141 const int PSEUDO_INFINIT = 200; 142 143 class LanguageGenerator_impl; 144 145 class LanguageGenerator{ 146 LanguageGenerator_impl * lang_gen; 147 public: 148 enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE}; 149 LanguageGenerator(); 150 ~LanguageGenerator(); 151 PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE); 152 const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */ 153 }; 154 155 void TestWbnf(void); 156 157 #endif /* _WBNF */ 158