Home | History | Annotate | Download | only in re2c
      1 #include <stdlib.h>
      2 #include <ctype.h>
      3 #include <string.h>
      4 #include "tools/re2c/globals.h"
      5 #include "tools/re2c/substr.h"
      6 #include "tools/re2c/dfa.h"
      7 
      8 #define octCh(c) ('0' + c%8)
      9 
     10 void prtCh(FILE *o, unsigned char c){
     11     unsigned char oc = talx[c];
     12     switch(oc){
     13     case '\'': fputs("\\'", o); break;
     14     case '\n': fputs("\\n", o); break;
     15     case '\t': fputs("\\t", o); break;
     16     case '\v': fputs("\\v", o); break;
     17     case '\b': fputs("\\b", o); break;
     18     case '\r': fputs("\\r", o); break;
     19     case '\f': fputs("\\f", o); break;
     20     case '\a': fputs("\\a", o); break;
     21     case '\\': fputs("\\\\", o); break;
     22     default:
     23 	if(isprint(oc))
     24 	    fputc(oc, o);
     25 	else
     26 	    fprintf(o, "\\%c%c%c", octCh(c/64), octCh(c/8), octCh(c));
     27     }
     28 }
     29 
     30 void printSpan(FILE *o, unsigned int lb, unsigned int ub){
     31     if(lb > ub)
     32 	fputc('*', o);
     33     fputc('[', o);
     34     if((ub - lb) == 1){
     35 	prtCh(o, lb);
     36     } else {
     37 	prtCh(o, lb);
     38 	fputc('-', o);
     39 	prtCh(o, ub-1);
     40     }
     41     fputc(']', o);
     42 }
     43 
     44 unsigned int
     45 Span_show(Span *s, FILE *o, unsigned int lb)
     46 {
     47     if(s->to){
     48 	printSpan(o, lb, s->ub);
     49 	fprintf(o, " %u; ", s->to->label);
     50     }
     51     return s->ub;
     52 }
     53 
     54 void
     55 State_out(FILE *o, const State *s){
     56     unsigned int lb, i;
     57     fprintf(o, "state %u", s->label);
     58     if(s->rule)
     59 	fprintf(o, " accepts %u", s->rule->d.RuleOp.accept);
     60     fputs("\n", o); oline++;
     61     lb = 0;
     62     for(i = 0; i < s->go.nSpans; ++i)
     63 	lb = Span_show(&s->go.span[i], o, lb);
     64 }
     65 
     66 void
     67 DFA_out(FILE *o, const DFA *dfa){
     68     State *s;
     69     for(s = dfa->head; s; s = s->next) {
     70 	State_out(o, s);
     71 	fputs("\n\n", o); oline+=2;
     72     }
     73 }
     74 
     75 State *
     76 State_new(void)
     77 {
     78     State *s = malloc(sizeof(State));
     79     s->label = 0;
     80     s->rule = NULL;
     81     s->next = NULL;
     82     s->link = NULL;
     83     s->depth = 0;
     84     s->kCount = 0;
     85     s->kernel = NULL;
     86     s->isBase = 0;
     87     s->action = NULL;
     88     s->go.nSpans = 0;
     89     s->go.span = NULL;
     90     return s;
     91 }
     92 
     93 void
     94 State_delete(State *s)
     95 {
     96     if (s->kernel)
     97 	free(s->kernel);
     98     if (s->go.span)
     99 	free(s->go.span);
    100     free(s);
    101 }
    102 
    103 static Ins **closure(Ins **cP, Ins *i){
    104     while(!isMarked(i)){
    105 	mark(i);
    106 	*(cP++) = i;
    107 	if(i->i.tag == FORK){
    108 	    cP = closure(cP, i + 1);
    109 	    i = (Ins*) i->i.link;
    110 	} else if(i->i.tag == GOTO){
    111 	    i = (Ins*) i->i.link;
    112 	} else
    113 	    break;
    114     }
    115     return cP;
    116 }
    117 
    118 typedef struct GoTo {
    119     Char	ch;
    120     void	*to;
    121 } GoTo;
    122 
    123 DFA *
    124 DFA_new(Ins *ins, unsigned int ni, unsigned int lb, unsigned int ub, Char *rep)
    125 {
    126     DFA *d = malloc(sizeof(DFA));
    127     Ins **work = malloc(sizeof(Ins*)*(ni+1));
    128     unsigned int nc = ub - lb;
    129     GoTo *goTo = malloc(sizeof(GoTo)*nc);
    130     Span *span = malloc(sizeof(Span)*nc);
    131 
    132     d->lbChar = lb;
    133     d->ubChar = ub;
    134     memset((char*) goTo, 0, nc*sizeof(GoTo));
    135     d->tail = &d->head;
    136     d->head = NULL;
    137     d->nStates = 0;
    138     d->toDo = NULL;
    139     DFA_findState(d, work, closure(work, &ins[0]) - work);
    140     while(d->toDo){
    141 	State *s = d->toDo;
    142 
    143 	Ins **cP, **iP, *i;
    144 	unsigned int nGoTos = 0;
    145 	unsigned int j;
    146 
    147 	d->toDo = s->link;
    148 	s->rule = NULL;
    149 	for(iP = s->kernel; (i = *iP); ++iP){
    150 	    if(i->i.tag == CHAR){
    151 		Ins *j2;
    152 		for(j2 = i + 1; j2 < (Ins*) i->i.link; ++j2){
    153 		    if(!(j2->c.link = goTo[j2->c.value - lb].to))
    154 			goTo[nGoTos++].ch = j2->c.value;
    155 		    goTo[j2->c.value - lb].to = j2;
    156 		}
    157 	    } else if(i->i.tag == TERM){
    158 		if(!s->rule || ((RegExp *)i->i.link)->d.RuleOp.accept < s->rule->d.RuleOp.accept)
    159 		    s->rule = (RegExp *)i->i.link;
    160 	    }
    161 	}
    162 
    163 	for(j = 0; j < nGoTos; ++j){
    164 	    GoTo *go = &goTo[goTo[j].ch - lb];
    165 	    i = (Ins*) go->to;
    166 	    for(cP = work; i; i = (Ins*) i->c.link)
    167 		cP = closure(cP, i + i->c.bump);
    168 	    go->to = DFA_findState(d, work, cP - work);
    169 	}
    170 
    171 	s->go.nSpans = 0;
    172 	for(j = 0; j < nc;){
    173 	    State *to = (State*) goTo[rep[j]].to;
    174 	    while(++j < nc && goTo[rep[j]].to == to);
    175 	    span[s->go.nSpans].ub = lb + j;
    176 	    span[s->go.nSpans].to = to;
    177 	    s->go.nSpans++;
    178 	}
    179 
    180 	for(j = nGoTos; j-- > 0;)
    181 	    goTo[goTo[j].ch - lb].to = NULL;
    182 
    183 	s->go.span = malloc(sizeof(Span)*s->go.nSpans);
    184 	memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span));
    185 
    186 	Action_new_Match(s);
    187 
    188     }
    189     free(work);
    190     free(goTo);
    191     free(span);
    192 
    193     return d;
    194 }
    195 
    196 void
    197 DFA_delete(DFA *d){
    198     State *s;
    199     while((s = d->head)){
    200 	d->head = s->next;
    201 	State_delete(s);
    202     }
    203 }
    204 
    205 void DFA_addState(DFA *d, State **a, State *s){
    206     s->label = d->nStates++;
    207     s->next = *a;
    208     *a = s;
    209     if(a == d->tail)
    210 	d->tail = &s->next;
    211 }
    212 
    213 State *DFA_findState(DFA *d, Ins **kernel, unsigned int kCount){
    214     Ins **cP, **iP, *i;
    215     State *s;
    216 
    217     kernel[kCount] = NULL;
    218 
    219     cP = kernel;
    220     for(iP = kernel; (i = *iP); ++iP){
    221 	 if(i->i.tag == CHAR || i->i.tag == TERM){
    222 	     *cP++ = i;
    223 	} else {
    224 	     unmark(i);
    225 	}
    226     }
    227     kCount = cP - kernel;
    228     kernel[kCount] = NULL;
    229 
    230     for(s = d->head; s; s = s->next){
    231 	 if(s->kCount == kCount){
    232 	     for(iP = s->kernel; (i = *iP); ++iP)
    233 		 if(!isMarked(i))
    234 		     goto nextState;
    235 	     goto unmarkAll;
    236 	 }
    237 	 nextState:;
    238     }
    239 
    240     s = State_new();
    241     DFA_addState(d, d->tail, s);
    242     s->kCount = kCount;
    243     s->kernel = malloc(sizeof(Ins*)*(kCount+1));
    244     memcpy(s->kernel, kernel, (kCount+1)*sizeof(Ins*));
    245     s->link = d->toDo;
    246     d->toDo = s;
    247 
    248 unmarkAll:
    249     for(iP = kernel; (i = *iP); ++iP)
    250 	 unmark(i);
    251 
    252     return s;
    253 }
    254