Home | History | Annotate | Download | only in intltest
      1 /*
      2  ******************************************************************************
      3  * Copyright (C) 2005, International Business Machines Corporation and   *
      4  * others. All Rights Reserved.                                               *
      5  ******************************************************************************
      6  */
      7 /*
      8   WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF
      9   and standard BNF is the WBNF accepts weight for its alternation items.
     10   The weight specifies the opportunity it will be selected.
     11 
     12   The purpose of WBNF is to help generate a random string from a given grammar
     13   which can be described with standard BNF. The introduction of 'weight'
     14   is to guide the generator to give the specific parts different chances to be
     15   generated.
     16 
     17   Usually, the user gives LanguageGenerator the grammar description in WBNF,
     18   then LanguageGenerator will generate a random string on every next() call.
     19   The return code of parseBNF() can help user to determine the error,
     20   either in the grammar description or in the WBNF parser itself.
     21 
     22 
     23   The grammar of WBNF itself can be described in standard BNF,
     24 
     25     escaping        = _single character with a leading back slash, either inside or outside quoting_
     26     quoting         = _quoted with a pair of single quotation marks_
     27     string          = string alphabet | string digit | string quoting | string escaping |
     28                       alphabet | quoting | escaping
     29     alphabet        =
     30     digit           =
     31     integer         = integer digit | digit
     32     weight          = integer %
     33     weight-list     = weight-list weight | weight
     34     var             = var alphabet | var digit | $ alphabet
     35 
     36     var-defs        = var-defs var-def | var-def
     37     var-def         = var '=' definition;
     38 
     39     alternation     = alternation '|' alt-item | alt-item
     40     alt-item        = sequence | sequence weight
     41 
     42     sequence        = sequence modified | modified
     43 
     44     modified        = core | morph | quote | repeat
     45     morph           = modified ~
     46     quote           = modified @
     47     repeat          = modified quantifier | modified quantifier weight-list
     48     quantifier      = ? | * | + | { integer , integer} | {integer, } | {integer}
     49 
     50     core            = var | string | '(' definition ')'
     51 
     52     definition      = core | modified | sequence | alternation
     53     definition      = alternation
     54 
     55     Remarks:
     56     o Following characters are literals in preceding definition
     57       but are syntax symbols in WBNF
     58 
     59       % $ ~ @ ? * + { } ,
     60 
     61     o Following character are syntax symbols in preceding definition
     62               (sapce) contact operation, or separators to increase readability
     63       =       definition
     64       |       selection operation
     65       ( )     precedence select
     66       ' '     override special-character to plain character
     67 
     68     o the definition of 'escaping' and 'quoting' are preceding definition text
     69     o infinite is actually a predefine value PSEUDO_INFINIT defined in this file
     70     o if weight is not presented in "alt-item' and 'repeat',
     71       a default weight DEFAULT_WEIGHT defined in this file is used
     72 
     73     o * == {0,  }
     74       + == {1,  }
     75       ? == {0, 1}
     76 
     77     o the weight-list for repeat assigns the weights for repeat itmes one by one
     78 
     79       demo{1,3} 30% 40% 100%  ==  (demo)30% | (demodemo)40% | (demodemodemo)100%
     80 
     81       To find more explain of the weight-list, please see the LIMITATION of the grammar
     82 
     83     o but the weight-list for question mark has different meaning
     84 
     85       demo ? 30%   != demo{0,1} 30% 100%
     86       demo ? 30%   == demo{0,1} 70% 30%
     87 
     88       the 70% is calculated from (DEFAULT_WEIGHT - weight)
     89 
     90 
     91   Known LIMITATION of the grammar
     92     For 'repeat', the parser will eat up as much as possible weights at one time,
     93     discard superfluous weights if it is too much,
     94     fill insufficient weights with default weight if it is too less.
     95     This behavior means following definitions are equal
     96 
     97         demo{1,3} 30% 40% 100%
     98         demo{1,3} 30% 40% 100% 50%
     99         demo{1,3} 30% 40%
    100 
    101     This behavior will cause a little confusion when defining an alternation
    102 
    103         demo{1,3} 30% 40% 100% 50% | show 20%
    104 
    105     is interpreted as
    106 
    107         (demo{1,3} 30% 40% 100%) 100% | show 20%
    108 
    109     not
    110 
    111         (demo{1,3} 30% 40% 100%) 50% | show 20%
    112 
    113     to get an expected definition, please use parentheses.
    114 
    115   Known LIMITATION of current implement
    116     Due to the well known point alias problem, current Parser will be effectively
    117     crashed if the definition looks like
    118 
    119         $a = demo;
    120         $b = $a;
    121         $c = $a;
    122     or
    123         $a = demo;
    124         $b = $a $a;
    125     or
    126         $a = demo;
    127         $b = $b $a;
    128 
    129     The crash will occur at delete operation in destructor or other memory release code.
    130     Several plans are on hard to fix the problem. Use a smart point with reference count,
    131     or use a central memory management solution. But now, it works well with collation
    132     monkey test, which is the only user for WBNF.
    133 */
    134 
    135 #ifndef _WBNF
    136 #define _WBNF
    137 
    138 #include "unicode/utypes.h"
    139 
    140 const int DEFAULT_WEIGHT = 100;
    141 const int PSEUDO_INFINIT = 200;
    142 
    143 class LanguageGenerator_impl;
    144 
    145 class LanguageGenerator{
    146     LanguageGenerator_impl * lang_gen;
    147 public:
    148     enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE};
    149     LanguageGenerator();
    150     ~LanguageGenerator();
    151     PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE);
    152     const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */
    153 };
    154 
    155 void TestWbnf(void);
    156 
    157 #endif /* _WBNF */
    158