Home | History | Annotate | Download | only in Parser
      1 /* Parser generator */
      2 
      3 /* For a description, see the comments at end of this file */
      4 
      5 #include "Python.h"
      6 #include "pgenheaders.h"
      7 #include "token.h"
      8 #include "node.h"
      9 #include "grammar.h"
     10 #include "metagrammar.h"
     11 #include "pgen.h"
     12 
     13 extern int Py_DebugFlag;
     14 extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
     15 
     16 
     17 /* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
     18 
     19 typedef struct _nfaarc {
     20     int         ar_label;
     21     int         ar_arrow;
     22 } nfaarc;
     23 
     24 typedef struct _nfastate {
     25     int         st_narcs;
     26     nfaarc      *st_arc;
     27 } nfastate;
     28 
     29 typedef struct _nfa {
     30     int                 nf_type;
     31     char                *nf_name;
     32     int                 nf_nstates;
     33     nfastate            *nf_state;
     34     int                 nf_start, nf_finish;
     35 } nfa;
     36 
     37 /* Forward */
     38 static void compile_rhs(labellist *ll,
     39                         nfa *nf, node *n, int *pa, int *pb);
     40 static void compile_alt(labellist *ll,
     41                         nfa *nf, node *n, int *pa, int *pb);
     42 static void compile_item(labellist *ll,
     43                          nfa *nf, node *n, int *pa, int *pb);
     44 static void compile_atom(labellist *ll,
     45                          nfa *nf, node *n, int *pa, int *pb);
     46 
     47 static int
     48 addnfastate(nfa *nf)
     49 {
     50     nfastate *st;
     51 
     52     nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
     53                                 sizeof(nfastate) * (nf->nf_nstates + 1));
     54     if (nf->nf_state == NULL)
     55         Py_FatalError("out of mem");
     56     st = &nf->nf_state[nf->nf_nstates++];
     57     st->st_narcs = 0;
     58     st->st_arc = NULL;
     59     return st - nf->nf_state;
     60 }
     61 
     62 static void
     63 addnfaarc(nfa *nf, int from, int to, int lbl)
     64 {
     65     nfastate *st;
     66     nfaarc *ar;
     67 
     68     st = &nf->nf_state[from];
     69     st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
     70                                   sizeof(nfaarc) * (st->st_narcs + 1));
     71     if (st->st_arc == NULL)
     72         Py_FatalError("out of mem");
     73     ar = &st->st_arc[st->st_narcs++];
     74     ar->ar_label = lbl;
     75     ar->ar_arrow = to;
     76 }
     77 
     78 static nfa *
     79 newnfa(char *name)
     80 {
     81     nfa *nf;
     82     static int type = NT_OFFSET; /* All types will be disjunct */
     83 
     84     nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
     85     if (nf == NULL)
     86         Py_FatalError("no mem for new nfa");
     87     nf->nf_type = type++;
     88     nf->nf_name = name; /* XXX strdup(name) ??? */
     89     nf->nf_nstates = 0;
     90     nf->nf_state = NULL;
     91     nf->nf_start = nf->nf_finish = -1;
     92     return nf;
     93 }
     94 
     95 typedef struct _nfagrammar {
     96     int                 gr_nnfas;
     97     nfa                 **gr_nfa;
     98     labellist           gr_ll;
     99 } nfagrammar;
    100 
    101 /* Forward */
    102 static void compile_rule(nfagrammar *gr, node *n);
    103 
    104 static nfagrammar *
    105 newnfagrammar(void)
    106 {
    107     nfagrammar *gr;
    108 
    109     gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
    110     if (gr == NULL)
    111         Py_FatalError("no mem for new nfa grammar");
    112     gr->gr_nnfas = 0;
    113     gr->gr_nfa = NULL;
    114     gr->gr_ll.ll_nlabels = 0;
    115     gr->gr_ll.ll_label = NULL;
    116     addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
    117     return gr;
    118 }
    119 
    120 static void
    121 freenfagrammar(nfagrammar *gr)
    122 {
    123     int i;
    124     for (i = 0; i < gr->gr_nnfas; i++) {
    125         PyObject_FREE(gr->gr_nfa[i]->nf_state);
    126     }
    127     PyObject_FREE(gr->gr_nfa);
    128     PyObject_FREE(gr);
    129 }
    130 
    131 static nfa *
    132 addnfa(nfagrammar *gr, char *name)
    133 {
    134     nfa *nf;
    135 
    136     nf = newnfa(name);
    137     gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
    138                                   sizeof(nfa*) * (gr->gr_nnfas + 1));
    139     if (gr->gr_nfa == NULL)
    140         Py_FatalError("out of mem");
    141     gr->gr_nfa[gr->gr_nnfas++] = nf;
    142     addlabel(&gr->gr_ll, NAME, nf->nf_name);
    143     return nf;
    144 }
    145 
    146 #ifdef Py_DEBUG
    147 
    148 static char REQNFMT[] = "metacompile: less than %d children\n";
    149 
    150 #define REQN(i, count) do { \
    151     if (i < count) { \
    152         fprintf(stderr, REQNFMT, count); \
    153         Py_FatalError("REQN"); \
    154     } \
    155 } while (0)
    156 
    157 #else
    158 #define REQN(i, count)  /* empty */
    159 #endif
    160 
    161 static nfagrammar *
    162 metacompile(node *n)
    163 {
    164     nfagrammar *gr;
    165     int i;
    166 
    167     if (Py_DebugFlag)
    168         printf("Compiling (meta-) parse tree into NFA grammar\n");
    169     gr = newnfagrammar();
    170     REQ(n, MSTART);
    171     i = n->n_nchildren - 1; /* Last child is ENDMARKER */
    172     n = n->n_child;
    173     for (; --i >= 0; n++) {
    174         if (n->n_type != NEWLINE)
    175             compile_rule(gr, n);
    176     }
    177     return gr;
    178 }
    179 
    180 static void
    181 compile_rule(nfagrammar *gr, node *n)
    182 {
    183     nfa *nf;
    184 
    185     REQ(n, RULE);
    186     REQN(n->n_nchildren, 4);
    187     n = n->n_child;
    188     REQ(n, NAME);
    189     nf = addnfa(gr, n->n_str);
    190     n++;
    191     REQ(n, COLON);
    192     n++;
    193     REQ(n, RHS);
    194     compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
    195     n++;
    196     REQ(n, NEWLINE);
    197 }
    198 
    199 static void
    200 compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    201 {
    202     int i;
    203     int a, b;
    204 
    205     REQ(n, RHS);
    206     i = n->n_nchildren;
    207     REQN(i, 1);
    208     n = n->n_child;
    209     REQ(n, ALT);
    210     compile_alt(ll, nf, n, pa, pb);
    211     if (--i <= 0)
    212         return;
    213     n++;
    214     a = *pa;
    215     b = *pb;
    216     *pa = addnfastate(nf);
    217     *pb = addnfastate(nf);
    218     addnfaarc(nf, *pa, a, EMPTY);
    219     addnfaarc(nf, b, *pb, EMPTY);
    220     for (; --i >= 0; n++) {
    221         REQ(n, VBAR);
    222         REQN(i, 1);
    223         --i;
    224         n++;
    225         REQ(n, ALT);
    226         compile_alt(ll, nf, n, &a, &b);
    227         addnfaarc(nf, *pa, a, EMPTY);
    228         addnfaarc(nf, b, *pb, EMPTY);
    229     }
    230 }
    231 
    232 static void
    233 compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    234 {
    235     int i;
    236     int a, b;
    237 
    238     REQ(n, ALT);
    239     i = n->n_nchildren;
    240     REQN(i, 1);
    241     n = n->n_child;
    242     REQ(n, ITEM);
    243     compile_item(ll, nf, n, pa, pb);
    244     --i;
    245     n++;
    246     for (; --i >= 0; n++) {
    247         REQ(n, ITEM);
    248         compile_item(ll, nf, n, &a, &b);
    249         addnfaarc(nf, *pb, a, EMPTY);
    250         *pb = b;
    251     }
    252 }
    253 
    254 static void
    255 compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    256 {
    257     int i;
    258     int a, b;
    259 
    260     REQ(n, ITEM);
    261     i = n->n_nchildren;
    262     REQN(i, 1);
    263     n = n->n_child;
    264     if (n->n_type == LSQB) {
    265         REQN(i, 3);
    266         n++;
    267         REQ(n, RHS);
    268         *pa = addnfastate(nf);
    269         *pb = addnfastate(nf);
    270         addnfaarc(nf, *pa, *pb, EMPTY);
    271         compile_rhs(ll, nf, n, &a, &b);
    272         addnfaarc(nf, *pa, a, EMPTY);
    273         addnfaarc(nf, b, *pb, EMPTY);
    274         REQN(i, 1);
    275         n++;
    276         REQ(n, RSQB);
    277     }
    278     else {
    279         compile_atom(ll, nf, n, pa, pb);
    280         if (--i <= 0)
    281             return;
    282         n++;
    283         addnfaarc(nf, *pb, *pa, EMPTY);
    284         if (n->n_type == STAR)
    285             *pb = *pa;
    286         else
    287             REQ(n, PLUS);
    288     }
    289 }
    290 
    291 static void
    292 compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    293 {
    294     int i;
    295 
    296     REQ(n, ATOM);
    297     i = n->n_nchildren;
    298     (void)i; /* Don't warn about set but unused */
    299     REQN(i, 1);
    300     n = n->n_child;
    301     if (n->n_type == LPAR) {
    302         REQN(i, 3);
    303         n++;
    304         REQ(n, RHS);
    305         compile_rhs(ll, nf, n, pa, pb);
    306         n++;
    307         REQ(n, RPAR);
    308     }
    309     else if (n->n_type == NAME || n->n_type == STRING) {
    310         *pa = addnfastate(nf);
    311         *pb = addnfastate(nf);
    312         addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
    313     }
    314     else
    315         REQ(n, NAME);
    316 }
    317 
    318 static void
    319 dumpstate(labellist *ll, nfa *nf, int istate)
    320 {
    321     nfastate *st;
    322     int i;
    323     nfaarc *ar;
    324 
    325     printf("%c%2d%c",
    326         istate == nf->nf_start ? '*' : ' ',
    327         istate,
    328         istate == nf->nf_finish ? '.' : ' ');
    329     st = &nf->nf_state[istate];
    330     ar = st->st_arc;
    331     for (i = 0; i < st->st_narcs; i++) {
    332         if (i > 0)
    333             printf("\n    ");
    334         printf("-> %2d  %s", ar->ar_arrow,
    335             PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
    336         ar++;
    337     }
    338     printf("\n");
    339 }
    340 
    341 static void
    342 dumpnfa(labellist *ll, nfa *nf)
    343 {
    344     int i;
    345 
    346     printf("NFA '%s' has %d states; start %d, finish %d\n",
    347         nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
    348     for (i = 0; i < nf->nf_nstates; i++)
    349         dumpstate(ll, nf, i);
    350 }
    351 
    352 
    353 /* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
    354 
    355 static void
    356 addclosure(bitset ss, nfa *nf, int istate)
    357 {
    358     if (addbit(ss, istate)) {
    359         nfastate *st = &nf->nf_state[istate];
    360         nfaarc *ar = st->st_arc;
    361         int i;
    362 
    363         for (i = st->st_narcs; --i >= 0; ) {
    364             if (ar->ar_label == EMPTY)
    365                 addclosure(ss, nf, ar->ar_arrow);
    366             ar++;
    367         }
    368     }
    369 }
    370 
    371 typedef struct _ss_arc {
    372     bitset      sa_bitset;
    373     int         sa_arrow;
    374     int         sa_label;
    375 } ss_arc;
    376 
    377 typedef struct _ss_state {
    378     bitset      ss_ss;
    379     int         ss_narcs;
    380     struct _ss_arc      *ss_arc;
    381     int         ss_deleted;
    382     int         ss_finish;
    383     int         ss_rename;
    384 } ss_state;
    385 
    386 typedef struct _ss_dfa {
    387     int         sd_nstates;
    388     ss_state *sd_state;
    389 } ss_dfa;
    390 
    391 /* Forward */
    392 static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
    393                        labellist *ll, char *msg);
    394 static void simplify(int xx_nstates, ss_state *xx_state);
    395 static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
    396 
    397 static void
    398 makedfa(nfagrammar *gr, nfa *nf, dfa *d)
    399 {
    400     int nbits = nf->nf_nstates;
    401     bitset ss;
    402     int xx_nstates;
    403     ss_state *xx_state, *yy;
    404     ss_arc *zz;
    405     int istate, jstate, iarc, jarc, ibit;
    406     nfastate *st;
    407     nfaarc *ar;
    408     int i, j;
    409 
    410     ss = newbitset(nbits);
    411     addclosure(ss, nf, nf->nf_start);
    412     xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
    413     if (xx_state == NULL)
    414         Py_FatalError("no mem for xx_state in makedfa");
    415     xx_nstates = 1;
    416     yy = &xx_state[0];
    417     yy->ss_ss = ss;
    418     yy->ss_narcs = 0;
    419     yy->ss_arc = NULL;
    420     yy->ss_deleted = 0;
    421     yy->ss_finish = testbit(ss, nf->nf_finish);
    422     if (yy->ss_finish)
    423         printf("Error: nonterminal '%s' may produce empty.\n",
    424             nf->nf_name);
    425 
    426     /* This algorithm is from a book written before
    427        the invention of structured programming... */
    428 
    429     /* For each unmarked state... */
    430     for (istate = 0; istate < xx_nstates; ++istate) {
    431         size_t size;
    432         yy = &xx_state[istate];
    433         ss = yy->ss_ss;
    434         /* For all its states... */
    435         for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
    436             if (!testbit(ss, ibit))
    437                 continue;
    438             st = &nf->nf_state[ibit];
    439             /* For all non-empty arcs from this state... */
    440             for (iarc = 0; iarc < st->st_narcs; iarc++) {
    441                 ar = &st->st_arc[iarc];
    442                 if (ar->ar_label == EMPTY)
    443                     continue;
    444                 /* Look up in list of arcs from this state */
    445                 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
    446                     zz = &yy->ss_arc[jarc];
    447                     if (ar->ar_label == zz->sa_label)
    448                         goto found;
    449                 }
    450                 /* Add new arc for this state */
    451                 size = sizeof(ss_arc) * (yy->ss_narcs + 1);
    452                 yy->ss_arc = (ss_arc *)PyObject_REALLOC(
    453                                             yy->ss_arc, size);
    454                 if (yy->ss_arc == NULL)
    455                     Py_FatalError("out of mem");
    456                 zz = &yy->ss_arc[yy->ss_narcs++];
    457                 zz->sa_label = ar->ar_label;
    458                 zz->sa_bitset = newbitset(nbits);
    459                 zz->sa_arrow = -1;
    460              found:             ;
    461                 /* Add destination */
    462                 addclosure(zz->sa_bitset, nf, ar->ar_arrow);
    463             }
    464         }
    465         /* Now look up all the arrow states */
    466         for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
    467             zz = &xx_state[istate].ss_arc[jarc];
    468             for (jstate = 0; jstate < xx_nstates; jstate++) {
    469                 if (samebitset(zz->sa_bitset,
    470                     xx_state[jstate].ss_ss, nbits)) {
    471                     zz->sa_arrow = jstate;
    472                     goto done;
    473                 }
    474             }
    475             size = sizeof(ss_state) * (xx_nstates + 1);
    476             xx_state = (ss_state *)PyObject_REALLOC(xx_state,
    477                                                         size);
    478             if (xx_state == NULL)
    479                 Py_FatalError("out of mem");
    480             zz->sa_arrow = xx_nstates;
    481             yy = &xx_state[xx_nstates++];
    482             yy->ss_ss = zz->sa_bitset;
    483             yy->ss_narcs = 0;
    484             yy->ss_arc = NULL;
    485             yy->ss_deleted = 0;
    486             yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
    487          done:          ;
    488         }
    489     }
    490 
    491     if (Py_DebugFlag)
    492         printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
    493                                         "before minimizing");
    494 
    495     simplify(xx_nstates, xx_state);
    496 
    497     if (Py_DebugFlag)
    498         printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
    499                                         "after minimizing");
    500 
    501     convert(d, xx_nstates, xx_state);
    502 
    503     for (i = 0; i < xx_nstates; i++) {
    504         for (j = 0; j < xx_state[i].ss_narcs; j++)
    505             delbitset(xx_state[i].ss_arc[j].sa_bitset);
    506         PyObject_FREE(xx_state[i].ss_arc);
    507     }
    508     PyObject_FREE(xx_state);
    509 }
    510 
    511 static void
    512 printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
    513            labellist *ll, char *msg)
    514 {
    515     int i, ibit, iarc;
    516     ss_state *yy;
    517     ss_arc *zz;
    518 
    519     printf("Subset DFA %s\n", msg);
    520     for (i = 0; i < xx_nstates; i++) {
    521         yy = &xx_state[i];
    522         if (yy->ss_deleted)
    523             continue;
    524         printf(" Subset %d", i);
    525         if (yy->ss_finish)
    526             printf(" (finish)");
    527         printf(" { ");
    528         for (ibit = 0; ibit < nbits; ibit++) {
    529             if (testbit(yy->ss_ss, ibit))
    530                 printf("%d ", ibit);
    531         }
    532         printf("}\n");
    533         for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
    534             zz = &yy->ss_arc[iarc];
    535             printf("  Arc to state %d, label %s\n",
    536                 zz->sa_arrow,
    537                 PyGrammar_LabelRepr(
    538                     &ll->ll_label[zz->sa_label]));
    539         }
    540     }
    541 }
    542 
    543 
    544 /* PART THREE -- SIMPLIFY DFA */
    545 
    546 /* Simplify the DFA by repeatedly eliminating states that are
    547    equivalent to another oner.  This is NOT Algorithm 3.3 from
    548    [Aho&Ullman 77].  It does not always finds the minimal DFA,
    549    but it does usually make a much smaller one...  (For an example
    550    of sub-optimal behavior, try S: x a b+ | y a b+.)
    551 */
    552 
    553 static int
    554 samestate(ss_state *s1, ss_state *s2)
    555 {
    556     int i;
    557 
    558     if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
    559         return 0;
    560     for (i = 0; i < s1->ss_narcs; i++) {
    561         if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
    562             s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
    563             return 0;
    564     }
    565     return 1;
    566 }
    567 
    568 static void
    569 renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
    570 {
    571     int i, j;
    572 
    573     if (Py_DebugFlag)
    574         printf("Rename state %d to %d.\n", from, to);
    575     for (i = 0; i < xx_nstates; i++) {
    576         if (xx_state[i].ss_deleted)
    577             continue;
    578         for (j = 0; j < xx_state[i].ss_narcs; j++) {
    579             if (xx_state[i].ss_arc[j].sa_arrow == from)
    580                 xx_state[i].ss_arc[j].sa_arrow = to;
    581         }
    582     }
    583 }
    584 
    585 static void
    586 simplify(int xx_nstates, ss_state *xx_state)
    587 {
    588     int changes;
    589     int i, j;
    590 
    591     do {
    592         changes = 0;
    593         for (i = 1; i < xx_nstates; i++) {
    594             if (xx_state[i].ss_deleted)
    595                 continue;
    596             for (j = 0; j < i; j++) {
    597                 if (xx_state[j].ss_deleted)
    598                     continue;
    599                 if (samestate(&xx_state[i], &xx_state[j])) {
    600                     xx_state[i].ss_deleted++;
    601                     renamestates(xx_nstates, xx_state,
    602                                  i, j);
    603                     changes++;
    604                     break;
    605                 }
    606             }
    607         }
    608     } while (changes);
    609 }
    610 
    611 
    612 /* PART FOUR -- GENERATE PARSING TABLES */
    613 
    614 /* Convert the DFA into a grammar that can be used by our parser */
    615 
    616 static void
    617 convert(dfa *d, int xx_nstates, ss_state *xx_state)
    618 {
    619     int i, j;
    620     ss_state *yy;
    621     ss_arc *zz;
    622 
    623     for (i = 0; i < xx_nstates; i++) {
    624         yy = &xx_state[i];
    625         if (yy->ss_deleted)
    626             continue;
    627         yy->ss_rename = addstate(d);
    628     }
    629 
    630     for (i = 0; i < xx_nstates; i++) {
    631         yy = &xx_state[i];
    632         if (yy->ss_deleted)
    633             continue;
    634         for (j = 0; j < yy->ss_narcs; j++) {
    635             zz = &yy->ss_arc[j];
    636             addarc(d, yy->ss_rename,
    637                 xx_state[zz->sa_arrow].ss_rename,
    638                 zz->sa_label);
    639         }
    640         if (yy->ss_finish)
    641             addarc(d, yy->ss_rename, yy->ss_rename, 0);
    642     }
    643 
    644     d->d_initial = 0;
    645 }
    646 
    647 
    648 /* PART FIVE -- GLUE IT ALL TOGETHER */
    649 
    650 static grammar *
    651 maketables(nfagrammar *gr)
    652 {
    653     int i;
    654     nfa *nf;
    655     dfa *d;
    656     grammar *g;
    657 
    658     if (gr->gr_nnfas == 0)
    659         return NULL;
    660     g = newgrammar(gr->gr_nfa[0]->nf_type);
    661                     /* XXX first rule must be start rule */
    662     g->g_ll = gr->gr_ll;
    663 
    664     for (i = 0; i < gr->gr_nnfas; i++) {
    665         nf = gr->gr_nfa[i];
    666         if (Py_DebugFlag) {
    667             printf("Dump of NFA for '%s' ...\n", nf->nf_name);
    668             dumpnfa(&gr->gr_ll, nf);
    669             printf("Making DFA for '%s' ...\n", nf->nf_name);
    670         }
    671         d = adddfa(g, nf->nf_type, nf->nf_name);
    672         makedfa(gr, gr->gr_nfa[i], d);
    673     }
    674 
    675     return g;
    676 }
    677 
    678 grammar *
    679 pgen(node *n)
    680 {
    681     nfagrammar *gr;
    682     grammar *g;
    683 
    684     gr = metacompile(n);
    685     g = maketables(gr);
    686     translatelabels(g);
    687     addfirstsets(g);
    688     freenfagrammar(gr);
    689     return g;
    690 }
    691 
    692 grammar *
    693 Py_pgen(node *n)
    694 {
    695   return pgen(n);
    696 }
    697 
    698 /*
    699 
    700 Description
    701 -----------
    702 
    703 Input is a grammar in extended BNF (using * for repetition, + for
    704 at-least-once repetition, [] for optional parts, | for alternatives and
    705 () for grouping).  This has already been parsed and turned into a parse
    706 tree.
    707 
    708 Each rule is considered as a regular expression in its own right.
    709 It is turned into a Non-deterministic Finite Automaton (NFA), which
    710 is then turned into a Deterministic Finite Automaton (DFA), which is then
    711 optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
    712 or similar compiler books (this technique is more often used for lexical
    713 analyzers).
    714 
    715 The DFA's are used by the parser as parsing tables in a special way
    716 that's probably unique.  Before they are usable, the FIRST sets of all
    717 non-terminals are computed.
    718 
    719 Reference
    720 ---------
    721 
    722 [Aho&Ullman 77]
    723     Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
    724     (first edition)
    725 
    726 */
    727