Home | History | Annotate | Download | only in Parser
      1 /* Parser generator */
      2 
      3 /* For a description, see the comments at end of this file */
      4 
      5 #include "Python.h"
      6 #include "pgenheaders.h"
      7 #include "token.h"
      8 #include "node.h"
      9 #include "grammar.h"
     10 #include "metagrammar.h"
     11 #include "pgen.h"
     12 
     13 extern int Py_DebugFlag;
     14 extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
     15 
     16 
     17 /* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
     18 
     19 typedef struct _nfaarc {
     20     int         ar_label;
     21     int         ar_arrow;
     22 } nfaarc;
     23 
     24 typedef struct _nfastate {
     25     int         st_narcs;
     26     nfaarc      *st_arc;
     27 } nfastate;
     28 
     29 typedef struct _nfa {
     30     int                 nf_type;
     31     char                *nf_name;
     32     int                 nf_nstates;
     33     nfastate            *nf_state;
     34     int                 nf_start, nf_finish;
     35 } nfa;
     36 
     37 /* Forward */
     38 static void compile_rhs(labellist *ll,
     39                         nfa *nf, node *n, int *pa, int *pb);
     40 static void compile_alt(labellist *ll,
     41                         nfa *nf, node *n, int *pa, int *pb);
     42 static void compile_item(labellist *ll,
     43                          nfa *nf, node *n, int *pa, int *pb);
     44 static void compile_atom(labellist *ll,
     45                          nfa *nf, node *n, int *pa, int *pb);
     46 
     47 static int
     48 addnfastate(nfa *nf)
     49 {
     50     nfastate *st;
     51 
     52     nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
     53                                 sizeof(nfastate) * (nf->nf_nstates + 1));
     54     if (nf->nf_state == NULL)
     55         Py_FatalError("out of mem");
     56     st = &nf->nf_state[nf->nf_nstates++];
     57     st->st_narcs = 0;
     58     st->st_arc = NULL;
     59     return st - nf->nf_state;
     60 }
     61 
     62 static void
     63 addnfaarc(nfa *nf, int from, int to, int lbl)
     64 {
     65     nfastate *st;
     66     nfaarc *ar;
     67 
     68     st = &nf->nf_state[from];
     69     st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
     70                                   sizeof(nfaarc) * (st->st_narcs + 1));
     71     if (st->st_arc == NULL)
     72         Py_FatalError("out of mem");
     73     ar = &st->st_arc[st->st_narcs++];
     74     ar->ar_label = lbl;
     75     ar->ar_arrow = to;
     76 }
     77 
     78 static nfa *
     79 newnfa(char *name)
     80 {
     81     nfa *nf;
     82     static int type = NT_OFFSET; /* All types will be disjunct */
     83 
     84     nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
     85     if (nf == NULL)
     86         Py_FatalError("no mem for new nfa");
     87     nf->nf_type = type++;
     88     nf->nf_name = name; /* XXX strdup(name) ??? */
     89     nf->nf_nstates = 0;
     90     nf->nf_state = NULL;
     91     nf->nf_start = nf->nf_finish = -1;
     92     return nf;
     93 }
     94 
     95 typedef struct _nfagrammar {
     96     int                 gr_nnfas;
     97     nfa                 **gr_nfa;
     98     labellist           gr_ll;
     99 } nfagrammar;
    100 
    101 /* Forward */
    102 static void compile_rule(nfagrammar *gr, node *n);
    103 
    104 static nfagrammar *
    105 newnfagrammar(void)
    106 {
    107     nfagrammar *gr;
    108 
    109     gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
    110     if (gr == NULL)
    111         Py_FatalError("no mem for new nfa grammar");
    112     gr->gr_nnfas = 0;
    113     gr->gr_nfa = NULL;
    114     gr->gr_ll.ll_nlabels = 0;
    115     gr->gr_ll.ll_label = NULL;
    116     addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
    117     return gr;
    118 }
    119 
    120 static void
    121 freenfagrammar(nfagrammar *gr)
    122 {
    123     for (int i = 0; i < gr->gr_nnfas; i++) {
    124         PyObject_FREE(gr->gr_nfa[i]->nf_state);
    125     }
    126     PyObject_FREE(gr->gr_nfa);
    127     PyObject_FREE(gr);
    128 }
    129 
    130 static nfa *
    131 addnfa(nfagrammar *gr, char *name)
    132 {
    133     nfa *nf;
    134 
    135     nf = newnfa(name);
    136     gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
    137                                   sizeof(nfa*) * (gr->gr_nnfas + 1));
    138     if (gr->gr_nfa == NULL)
    139         Py_FatalError("out of mem");
    140     gr->gr_nfa[gr->gr_nnfas++] = nf;
    141     addlabel(&gr->gr_ll, NAME, nf->nf_name);
    142     return nf;
    143 }
    144 
    145 #ifdef Py_DEBUG
    146 
    147 static const char REQNFMT[] = "metacompile: less than %d children\n";
    148 
    149 #define REQN(i, count) do { \
    150     if (i < count) { \
    151         fprintf(stderr, REQNFMT, count); \
    152         Py_FatalError("REQN"); \
    153     } \
    154 } while (0)
    155 
    156 #else
    157 #define REQN(i, count)  /* empty */
    158 #endif
    159 
    160 static nfagrammar *
    161 metacompile(node *n)
    162 {
    163     nfagrammar *gr;
    164     int i;
    165 
    166     if (Py_DebugFlag)
    167         printf("Compiling (meta-) parse tree into NFA grammar\n");
    168     gr = newnfagrammar();
    169     REQ(n, MSTART);
    170     i = n->n_nchildren - 1; /* Last child is ENDMARKER */
    171     n = n->n_child;
    172     for (; --i >= 0; n++) {
    173         if (n->n_type != NEWLINE)
    174             compile_rule(gr, n);
    175     }
    176     return gr;
    177 }
    178 
    179 static void
    180 compile_rule(nfagrammar *gr, node *n)
    181 {
    182     nfa *nf;
    183 
    184     REQ(n, RULE);
    185     REQN(n->n_nchildren, 4);
    186     n = n->n_child;
    187     REQ(n, NAME);
    188     nf = addnfa(gr, n->n_str);
    189     n++;
    190     REQ(n, COLON);
    191     n++;
    192     REQ(n, RHS);
    193     compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
    194     n++;
    195     REQ(n, NEWLINE);
    196 }
    197 
    198 static void
    199 compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    200 {
    201     int i;
    202     int a, b;
    203 
    204     REQ(n, RHS);
    205     i = n->n_nchildren;
    206     REQN(i, 1);
    207     n = n->n_child;
    208     REQ(n, ALT);
    209     compile_alt(ll, nf, n, pa, pb);
    210     if (--i <= 0)
    211         return;
    212     n++;
    213     a = *pa;
    214     b = *pb;
    215     *pa = addnfastate(nf);
    216     *pb = addnfastate(nf);
    217     addnfaarc(nf, *pa, a, EMPTY);
    218     addnfaarc(nf, b, *pb, EMPTY);
    219     for (; --i >= 0; n++) {
    220         REQ(n, VBAR);
    221         REQN(i, 1);
    222         --i;
    223         n++;
    224         REQ(n, ALT);
    225         compile_alt(ll, nf, n, &a, &b);
    226         addnfaarc(nf, *pa, a, EMPTY);
    227         addnfaarc(nf, b, *pb, EMPTY);
    228     }
    229 }
    230 
    231 static void
    232 compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    233 {
    234     int i;
    235     int a, b;
    236 
    237     REQ(n, ALT);
    238     i = n->n_nchildren;
    239     REQN(i, 1);
    240     n = n->n_child;
    241     REQ(n, ITEM);
    242     compile_item(ll, nf, n, pa, pb);
    243     --i;
    244     n++;
    245     for (; --i >= 0; n++) {
    246         REQ(n, ITEM);
    247         compile_item(ll, nf, n, &a, &b);
    248         addnfaarc(nf, *pb, a, EMPTY);
    249         *pb = b;
    250     }
    251 }
    252 
    253 static void
    254 compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    255 {
    256     int i;
    257     int a, b;
    258 
    259     REQ(n, ITEM);
    260     i = n->n_nchildren;
    261     REQN(i, 1);
    262     n = n->n_child;
    263     if (n->n_type == LSQB) {
    264         REQN(i, 3);
    265         n++;
    266         REQ(n, RHS);
    267         *pa = addnfastate(nf);
    268         *pb = addnfastate(nf);
    269         addnfaarc(nf, *pa, *pb, EMPTY);
    270         compile_rhs(ll, nf, n, &a, &b);
    271         addnfaarc(nf, *pa, a, EMPTY);
    272         addnfaarc(nf, b, *pb, EMPTY);
    273         REQN(i, 1);
    274         n++;
    275         REQ(n, RSQB);
    276     }
    277     else {
    278         compile_atom(ll, nf, n, pa, pb);
    279         if (--i <= 0)
    280             return;
    281         n++;
    282         addnfaarc(nf, *pb, *pa, EMPTY);
    283         if (n->n_type == STAR)
    284             *pb = *pa;
    285         else
    286             REQ(n, PLUS);
    287     }
    288 }
    289 
    290 static void
    291 compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
    292 {
    293     int i;
    294 
    295     REQ(n, ATOM);
    296     i = n->n_nchildren;
    297     (void)i; /* Don't warn about set but unused */
    298     REQN(i, 1);
    299     n = n->n_child;
    300     if (n->n_type == LPAR) {
    301         REQN(i, 3);
    302         n++;
    303         REQ(n, RHS);
    304         compile_rhs(ll, nf, n, pa, pb);
    305         n++;
    306         REQ(n, RPAR);
    307     }
    308     else if (n->n_type == NAME || n->n_type == STRING) {
    309         *pa = addnfastate(nf);
    310         *pb = addnfastate(nf);
    311         addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
    312     }
    313     else
    314         REQ(n, NAME);
    315 }
    316 
    317 static void
    318 dumpstate(labellist *ll, nfa *nf, int istate)
    319 {
    320     nfastate *st;
    321     int i;
    322     nfaarc *ar;
    323 
    324     printf("%c%2d%c",
    325         istate == nf->nf_start ? '*' : ' ',
    326         istate,
    327         istate == nf->nf_finish ? '.' : ' ');
    328     st = &nf->nf_state[istate];
    329     ar = st->st_arc;
    330     for (i = 0; i < st->st_narcs; i++) {
    331         if (i > 0)
    332             printf("\n    ");
    333         printf("-> %2d  %s", ar->ar_arrow,
    334             PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
    335         ar++;
    336     }
    337     printf("\n");
    338 }
    339 
    340 static void
    341 dumpnfa(labellist *ll, nfa *nf)
    342 {
    343     int i;
    344 
    345     printf("NFA '%s' has %d states; start %d, finish %d\n",
    346         nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
    347     for (i = 0; i < nf->nf_nstates; i++)
    348         dumpstate(ll, nf, i);
    349 }
    350 
    351 
    352 /* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
    353 
    354 static void
    355 addclosure(bitset ss, nfa *nf, int istate)
    356 {
    357     if (addbit(ss, istate)) {
    358         nfastate *st = &nf->nf_state[istate];
    359         nfaarc *ar = st->st_arc;
    360         int i;
    361 
    362         for (i = st->st_narcs; --i >= 0; ) {
    363             if (ar->ar_label == EMPTY)
    364                 addclosure(ss, nf, ar->ar_arrow);
    365             ar++;
    366         }
    367     }
    368 }
    369 
    370 typedef struct _ss_arc {
    371     bitset      sa_bitset;
    372     int         sa_arrow;
    373     int         sa_label;
    374 } ss_arc;
    375 
    376 typedef struct _ss_state {
    377     bitset      ss_ss;
    378     int         ss_narcs;
    379     struct _ss_arc      *ss_arc;
    380     int         ss_deleted;
    381     int         ss_finish;
    382     int         ss_rename;
    383 } ss_state;
    384 
    385 typedef struct _ss_dfa {
    386     int         sd_nstates;
    387     ss_state *sd_state;
    388 } ss_dfa;
    389 
    390 /* Forward */
    391 static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
    392                        labellist *ll, const char *msg);
    393 static void simplify(int xx_nstates, ss_state *xx_state);
    394 static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
    395 
    396 static void
    397 makedfa(nfagrammar *gr, nfa *nf, dfa *d)
    398 {
    399     int nbits = nf->nf_nstates;
    400     bitset ss;
    401     int xx_nstates;
    402     ss_state *xx_state, *yy;
    403     ss_arc *zz;
    404     int istate, jstate, iarc, jarc, ibit;
    405     nfastate *st;
    406     nfaarc *ar;
    407 
    408     ss = newbitset(nbits);
    409     addclosure(ss, nf, nf->nf_start);
    410     xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
    411     if (xx_state == NULL)
    412         Py_FatalError("no mem for xx_state in makedfa");
    413     xx_nstates = 1;
    414     yy = &xx_state[0];
    415     yy->ss_ss = ss;
    416     yy->ss_narcs = 0;
    417     yy->ss_arc = NULL;
    418     yy->ss_deleted = 0;
    419     yy->ss_finish = testbit(ss, nf->nf_finish);
    420     if (yy->ss_finish)
    421         printf("Error: nonterminal '%s' may produce empty.\n",
    422             nf->nf_name);
    423 
    424     /* This algorithm is from a book written before
    425        the invention of structured programming... */
    426 
    427     /* For each unmarked state... */
    428     for (istate = 0; istate < xx_nstates; ++istate) {
    429         size_t size;
    430         yy = &xx_state[istate];
    431         ss = yy->ss_ss;
    432         /* For all its states... */
    433         for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
    434             if (!testbit(ss, ibit))
    435                 continue;
    436             st = &nf->nf_state[ibit];
    437             /* For all non-empty arcs from this state... */
    438             for (iarc = 0; iarc < st->st_narcs; iarc++) {
    439                 ar = &st->st_arc[iarc];
    440                 if (ar->ar_label == EMPTY)
    441                     continue;
    442                 /* Look up in list of arcs from this state */
    443                 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
    444                     zz = &yy->ss_arc[jarc];
    445                     if (ar->ar_label == zz->sa_label)
    446                         goto found;
    447                 }
    448                 /* Add new arc for this state */
    449                 size = sizeof(ss_arc) * (yy->ss_narcs + 1);
    450                 yy->ss_arc = (ss_arc *)PyObject_REALLOC(
    451                                             yy->ss_arc, size);
    452                 if (yy->ss_arc == NULL)
    453                     Py_FatalError("out of mem");
    454                 zz = &yy->ss_arc[yy->ss_narcs++];
    455                 zz->sa_label = ar->ar_label;
    456                 zz->sa_bitset = newbitset(nbits);
    457                 zz->sa_arrow = -1;
    458              found:             ;
    459                 /* Add destination */
    460                 addclosure(zz->sa_bitset, nf, ar->ar_arrow);
    461             }
    462         }
    463         /* Now look up all the arrow states */
    464         for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
    465             zz = &xx_state[istate].ss_arc[jarc];
    466             for (jstate = 0; jstate < xx_nstates; jstate++) {
    467                 if (samebitset(zz->sa_bitset,
    468                     xx_state[jstate].ss_ss, nbits)) {
    469                     zz->sa_arrow = jstate;
    470                     goto done;
    471                 }
    472             }
    473             size = sizeof(ss_state) * (xx_nstates + 1);
    474             xx_state = (ss_state *)PyObject_REALLOC(xx_state,
    475                                                         size);
    476             if (xx_state == NULL)
    477                 Py_FatalError("out of mem");
    478             zz->sa_arrow = xx_nstates;
    479             yy = &xx_state[xx_nstates++];
    480             yy->ss_ss = zz->sa_bitset;
    481             yy->ss_narcs = 0;
    482             yy->ss_arc = NULL;
    483             yy->ss_deleted = 0;
    484             yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
    485          done:          ;
    486         }
    487     }
    488 
    489     if (Py_DebugFlag)
    490         printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
    491                                         "before minimizing");
    492 
    493     simplify(xx_nstates, xx_state);
    494 
    495     if (Py_DebugFlag)
    496         printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
    497                                         "after minimizing");
    498 
    499     convert(d, xx_nstates, xx_state);
    500 
    501     for (int i = 0; i < xx_nstates; i++) {
    502         for (int j = 0; j < xx_state[i].ss_narcs; j++)
    503             delbitset(xx_state[i].ss_arc[j].sa_bitset);
    504         PyObject_FREE(xx_state[i].ss_arc);
    505     }
    506     PyObject_FREE(xx_state);
    507 }
    508 
    509 static void
    510 printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
    511            labellist *ll, const char *msg)
    512 {
    513     int i, ibit, iarc;
    514     ss_state *yy;
    515     ss_arc *zz;
    516 
    517     printf("Subset DFA %s\n", msg);
    518     for (i = 0; i < xx_nstates; i++) {
    519         yy = &xx_state[i];
    520         if (yy->ss_deleted)
    521             continue;
    522         printf(" Subset %d", i);
    523         if (yy->ss_finish)
    524             printf(" (finish)");
    525         printf(" { ");
    526         for (ibit = 0; ibit < nbits; ibit++) {
    527             if (testbit(yy->ss_ss, ibit))
    528                 printf("%d ", ibit);
    529         }
    530         printf("}\n");
    531         for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
    532             zz = &yy->ss_arc[iarc];
    533             printf("  Arc to state %d, label %s\n",
    534                 zz->sa_arrow,
    535                 PyGrammar_LabelRepr(
    536                     &ll->ll_label[zz->sa_label]));
    537         }
    538     }
    539 }
    540 
    541 
    542 /* PART THREE -- SIMPLIFY DFA */
    543 
    544 /* Simplify the DFA by repeatedly eliminating states that are
    545    equivalent to another oner.  This is NOT Algorithm 3.3 from
    546    [Aho&Ullman 77].  It does not always finds the minimal DFA,
    547    but it does usually make a much smaller one...  (For an example
    548    of sub-optimal behavior, try S: x a b+ | y a b+.)
    549 */
    550 
    551 static int
    552 samestate(ss_state *s1, ss_state *s2)
    553 {
    554     int i;
    555 
    556     if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
    557         return 0;
    558     for (i = 0; i < s1->ss_narcs; i++) {
    559         if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
    560             s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
    561             return 0;
    562     }
    563     return 1;
    564 }
    565 
    566 static void
    567 renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
    568 {
    569     int i, j;
    570 
    571     if (Py_DebugFlag)
    572         printf("Rename state %d to %d.\n", from, to);
    573     for (i = 0; i < xx_nstates; i++) {
    574         if (xx_state[i].ss_deleted)
    575             continue;
    576         for (j = 0; j < xx_state[i].ss_narcs; j++) {
    577             if (xx_state[i].ss_arc[j].sa_arrow == from)
    578                 xx_state[i].ss_arc[j].sa_arrow = to;
    579         }
    580     }
    581 }
    582 
    583 static void
    584 simplify(int xx_nstates, ss_state *xx_state)
    585 {
    586     int changes;
    587     int i, j;
    588 
    589     do {
    590         changes = 0;
    591         for (i = 1; i < xx_nstates; i++) {
    592             if (xx_state[i].ss_deleted)
    593                 continue;
    594             for (j = 0; j < i; j++) {
    595                 if (xx_state[j].ss_deleted)
    596                     continue;
    597                 if (samestate(&xx_state[i], &xx_state[j])) {
    598                     xx_state[i].ss_deleted++;
    599                     renamestates(xx_nstates, xx_state,
    600                                  i, j);
    601                     changes++;
    602                     break;
    603                 }
    604             }
    605         }
    606     } while (changes);
    607 }
    608 
    609 
    610 /* PART FOUR -- GENERATE PARSING TABLES */
    611 
    612 /* Convert the DFA into a grammar that can be used by our parser */
    613 
    614 static void
    615 convert(dfa *d, int xx_nstates, ss_state *xx_state)
    616 {
    617     int i, j;
    618     ss_state *yy;
    619     ss_arc *zz;
    620 
    621     for (i = 0; i < xx_nstates; i++) {
    622         yy = &xx_state[i];
    623         if (yy->ss_deleted)
    624             continue;
    625         yy->ss_rename = addstate(d);
    626     }
    627 
    628     for (i = 0; i < xx_nstates; i++) {
    629         yy = &xx_state[i];
    630         if (yy->ss_deleted)
    631             continue;
    632         for (j = 0; j < yy->ss_narcs; j++) {
    633             zz = &yy->ss_arc[j];
    634             addarc(d, yy->ss_rename,
    635                 xx_state[zz->sa_arrow].ss_rename,
    636                 zz->sa_label);
    637         }
    638         if (yy->ss_finish)
    639             addarc(d, yy->ss_rename, yy->ss_rename, 0);
    640     }
    641 
    642     d->d_initial = 0;
    643 }
    644 
    645 
    646 /* PART FIVE -- GLUE IT ALL TOGETHER */
    647 
    648 static grammar *
    649 maketables(nfagrammar *gr)
    650 {
    651     int i;
    652     nfa *nf;
    653     dfa *d;
    654     grammar *g;
    655 
    656     if (gr->gr_nnfas == 0)
    657         return NULL;
    658     g = newgrammar(gr->gr_nfa[0]->nf_type);
    659                     /* XXX first rule must be start rule */
    660     g->g_ll = gr->gr_ll;
    661 
    662     for (i = 0; i < gr->gr_nnfas; i++) {
    663         nf = gr->gr_nfa[i];
    664         if (Py_DebugFlag) {
    665             printf("Dump of NFA for '%s' ...\n", nf->nf_name);
    666             dumpnfa(&gr->gr_ll, nf);
    667             printf("Making DFA for '%s' ...\n", nf->nf_name);
    668         }
    669         d = adddfa(g, nf->nf_type, nf->nf_name);
    670         makedfa(gr, gr->gr_nfa[i], d);
    671     }
    672 
    673     return g;
    674 }
    675 
    676 grammar *
    677 pgen(node *n)
    678 {
    679     nfagrammar *gr;
    680     grammar *g;
    681 
    682     gr = metacompile(n);
    683     g = maketables(gr);
    684     translatelabels(g);
    685     addfirstsets(g);
    686     freenfagrammar(gr);
    687     return g;
    688 }
    689 
    690 grammar *
    691 Py_pgen(node *n)
    692 {
    693   return pgen(n);
    694 }
    695 
    696 /*
    697 
    698 Description
    699 -----------
    700 
    701 Input is a grammar in extended BNF (using * for repetition, + for
    702 at-least-once repetition, [] for optional parts, | for alternatives and
    703 () for grouping).  This has already been parsed and turned into a parse
    704 tree.
    705 
    706 Each rule is considered as a regular expression in its own right.
    707 It is turned into a Non-deterministic Finite Automaton (NFA), which
    708 is then turned into a Deterministic Finite Automaton (DFA), which is then
    709 optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
    710 or similar compiler books (this technique is more often used for lexical
    711 analyzers).
    712 
    713 The DFA's are used by the parser as parsing tables in a special way
    714 that's probably unique.  Before they are usable, the FIRST sets of all
    715 non-terminals are computed.
    716 
    717 Reference
    718 ---------
    719 
    720 [Aho&Ullman 77]
    721     Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
    722     (first edition)
    723 
    724 */
    725