Home | History | Annotate | Download | only in nawk-20071023
      1 /****************************************************************
      2 Copyright (C) Lucent Technologies 1997
      3 All Rights Reserved
      4 
      5 Permission to use, copy, modify, and distribute this software and
      6 its documentation for any purpose and without fee is hereby
      7 granted, provided that the above copyright notice appear in all
      8 copies and that both that the copyright notice and this
      9 permission notice and warranty disclaimer appear in supporting
     10 documentation, and that the name Lucent Technologies or any of
     11 its entities not be used in advertising or publicity pertaining
     12 to distribution of the software without specific, written prior
     13 permission.
     14 
     15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
     16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
     17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
     18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
     20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
     21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
     22 THIS SOFTWARE.
     23 ****************************************************************/
     24 
     25 /* lasciate ogne speranza, voi ch'intrate. */
     26 
     27 #define	DEBUG
     28 
     29 #include <ctype.h>
     30 #include <stdio.h>
     31 #include <string.h>
     32 #include <stdlib.h>
     33 #include "awk.h"
     34 #include "ytab.h"
     35 
     36 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
     37 				/* NCHARS is 2**n */
     38 #define MAXLIN 22
     39 
     40 #define type(v)		(v)->nobj	/* badly overloaded here */
     41 #define info(v)		(v)->ntype	/* badly overloaded here */
     42 #define left(v)		(v)->narg[0]
     43 #define right(v)	(v)->narg[1]
     44 #define parent(v)	(v)->nnext
     45 
     46 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
     47 #define ELEAF	case EMPTYRE:		/* empty string in regexp */
     48 #define UNARY	case STAR: case PLUS: case QUEST:
     49 
     50 /* encoding in tree Nodes:
     51 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
     52 		left is index, right contains value or pointer to value
     53 	unary (STAR, PLUS, QUEST): left is child, right is null
     54 	binary (CAT, OR): left and right are children
     55 	parent contains pointer to parent
     56 */
     57 
     58 
     59 int	*setvec;
     60 int	*tmpset;
     61 int	maxsetvec = 0;
     62 
     63 int	rtok;		/* next token in current re */
     64 int	rlxval;
     65 static uschar	*rlxstr;
     66 static uschar	*prestr;	/* current position in current re */
     67 static uschar	*lastre;	/* origin of last re */
     68 
     69 static	int setcnt;
     70 static	int poscnt;
     71 
     72 char	*patbeg;
     73 int	patlen;
     74 
     75 #define	NFA	20	/* cache this many dynamic fa's */
     76 fa	*fatab[NFA];
     77 int	nfatab	= 0;	/* entries in fatab */
     78 
     79 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
     80 {
     81 	int i, use, nuse;
     82 	fa *pfa;
     83 	static int now = 1;
     84 
     85 	if (setvec == 0) {	/* first time through any RE */
     86 		maxsetvec = MAXLIN;
     87 		setvec = (int *) malloc(maxsetvec * sizeof(int));
     88 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
     89 		if (setvec == 0 || tmpset == 0)
     90 			overflo("out of space initializing makedfa");
     91 	}
     92 
     93 	if (compile_time)	/* a constant for sure */
     94 		return mkdfa(s, anchor);
     95 	for (i = 0; i < nfatab; i++)	/* is it there already? */
     96 		if (fatab[i]->anchor == anchor
     97 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
     98 			fatab[i]->use = now++;
     99 			return fatab[i];
    100 		}
    101 	pfa = mkdfa(s, anchor);
    102 	if (nfatab < NFA) {	/* room for another */
    103 		fatab[nfatab] = pfa;
    104 		fatab[nfatab]->use = now++;
    105 		nfatab++;
    106 		return pfa;
    107 	}
    108 	use = fatab[0]->use;	/* replace least-recently used */
    109 	nuse = 0;
    110 	for (i = 1; i < nfatab; i++)
    111 		if (fatab[i]->use < use) {
    112 			use = fatab[i]->use;
    113 			nuse = i;
    114 		}
    115 	freefa(fatab[nuse]);
    116 	fatab[nuse] = pfa;
    117 	pfa->use = now++;
    118 	return pfa;
    119 }
    120 
    121 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
    122 				/* anchor = 1 for anchored matches, else 0 */
    123 {
    124 	Node *p, *p1;
    125 	fa *f;
    126 
    127 	p = reparse(s);
    128 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
    129 		/* put ALL STAR in front of reg.  exp. */
    130 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
    131 		/* put FINAL after reg.  exp. */
    132 
    133 	poscnt = 0;
    134 	penter(p1);	/* enter parent pointers and leaf indices */
    135 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
    136 		overflo("out of space for fa");
    137 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
    138 	cfoll(f, p1);	/* set up follow sets */
    139 	freetr(p1);
    140 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
    141 			overflo("out of space in makedfa");
    142 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
    143 		overflo("out of space in makedfa");
    144 	*f->posns[1] = 0;
    145 	f->initstat = makeinit(f, anchor);
    146 	f->anchor = anchor;
    147 	f->restr = (uschar *) tostring(s);
    148 	return f;
    149 }
    150 
    151 int makeinit(fa *f, int anchor)
    152 {
    153 	int i, k;
    154 
    155 	f->curstat = 2;
    156 	f->out[2] = 0;
    157 	f->reset = 0;
    158 	k = *(f->re[0].lfollow);
    159 	xfree(f->posns[2]);
    160 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
    161 		overflo("out of space in makeinit");
    162 	for (i=0; i <= k; i++) {
    163 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
    164 	}
    165 	if ((f->posns[2])[1] == f->accept)
    166 		f->out[2] = 1;
    167 	for (i=0; i < NCHARS; i++)
    168 		f->gototab[2][i] = 0;
    169 	f->curstat = cgoto(f, 2, HAT);
    170 	if (anchor) {
    171 		*f->posns[2] = k-1;	/* leave out position 0 */
    172 		for (i=0; i < k; i++) {
    173 			(f->posns[0])[i] = (f->posns[2])[i];
    174 		}
    175 
    176 		f->out[0] = f->out[2];
    177 		if (f->curstat != 2)
    178 			--(*f->posns[f->curstat]);
    179 	}
    180 	return f->curstat;
    181 }
    182 
    183 void penter(Node *p)	/* set up parent pointers and leaf indices */
    184 {
    185 	switch (type(p)) {
    186 	ELEAF
    187 	LEAF
    188 		info(p) = poscnt;
    189 		poscnt++;
    190 		break;
    191 	UNARY
    192 		penter(left(p));
    193 		parent(left(p)) = p;
    194 		break;
    195 	case CAT:
    196 	case OR:
    197 		penter(left(p));
    198 		penter(right(p));
    199 		parent(left(p)) = p;
    200 		parent(right(p)) = p;
    201 		break;
    202 	default:	/* can't happen */
    203 		FATAL("can't happen: unknown type %d in penter", type(p));
    204 		break;
    205 	}
    206 }
    207 
    208 void freetr(Node *p)	/* free parse tree */
    209 {
    210 	switch (type(p)) {
    211 	ELEAF
    212 	LEAF
    213 		xfree(p);
    214 		break;
    215 	UNARY
    216 		freetr(left(p));
    217 		xfree(p);
    218 		break;
    219 	case CAT:
    220 	case OR:
    221 		freetr(left(p));
    222 		freetr(right(p));
    223 		xfree(p);
    224 		break;
    225 	default:	/* can't happen */
    226 		FATAL("can't happen: unknown type %d in freetr", type(p));
    227 		break;
    228 	}
    229 }
    230 
    231 /* in the parsing of regular expressions, metacharacters like . have */
    232 /* to be seen literally;  \056 is not a metacharacter. */
    233 
    234 int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
    235 {			/* only pick up one 8-bit byte (2 chars) */
    236 	uschar *p;
    237 	int n = 0;
    238 	int i;
    239 
    240 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
    241 		if (isdigit(*p))
    242 			n = 16 * n + *p - '0';
    243 		else if (*p >= 'a' && *p <= 'f')
    244 			n = 16 * n + *p - 'a' + 10;
    245 		else if (*p >= 'A' && *p <= 'F')
    246 			n = 16 * n + *p - 'A' + 10;
    247 	}
    248 	*pp = (char *) p;
    249 	return n;
    250 }
    251 
    252 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
    253 
    254 int quoted(char **pp)	/* pick up next thing after a \\ */
    255 			/* and increment *pp */
    256 {
    257 	char *p = *pp;
    258 	int c;
    259 
    260 	if ((c = *p++) == 't')
    261 		c = '\t';
    262 	else if (c == 'n')
    263 		c = '\n';
    264 	else if (c == 'f')
    265 		c = '\f';
    266 	else if (c == 'r')
    267 		c = '\r';
    268 	else if (c == 'b')
    269 		c = '\b';
    270 	else if (c == '\\')
    271 		c = '\\';
    272 	else if (c == 'x') {	/* hexadecimal goo follows */
    273 		c = hexstr(&p);	/* this adds a null if number is invalid */
    274 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
    275 		int n = c - '0';
    276 		if (isoctdigit(*p)) {
    277 			n = 8 * n + *p++ - '0';
    278 			if (isoctdigit(*p))
    279 				n = 8 * n + *p++ - '0';
    280 		}
    281 		c = n;
    282 	} /* else */
    283 		/* c = c; */
    284 	*pp = p;
    285 	return c;
    286 }
    287 
    288 char *cclenter(const char *argp)	/* add a character class */
    289 {
    290 	int i, c, c2;
    291 	uschar *p = (uschar *) argp;
    292 	uschar *op, *bp;
    293 	static uschar *buf = 0;
    294 	static int bufsz = 100;
    295 
    296 	op = p;
    297 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
    298 		FATAL("out of space for character class [%.10s...] 1", p);
    299 	bp = buf;
    300 	for (i = 0; (c = *p++) != 0; ) {
    301 		if (c == '\\') {
    302 			c = quoted((char **) &p);
    303 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
    304 			if (*p != 0) {
    305 				c = bp[-1];
    306 				c2 = *p++;
    307 				if (c2 == '\\')
    308 					c2 = quoted((char **) &p);
    309 				if (c > c2) {	/* empty; ignore */
    310 					bp--;
    311 					i--;
    312 					continue;
    313 				}
    314 				while (c < c2) {
    315 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
    316 						FATAL("out of space for character class [%.10s...] 2", p);
    317 					*bp++ = ++c;
    318 					i++;
    319 				}
    320 				continue;
    321 			}
    322 		}
    323 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
    324 			FATAL("out of space for character class [%.10s...] 3", p);
    325 		*bp++ = c;
    326 		i++;
    327 	}
    328 	*bp = 0;
    329 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
    330 	xfree(op);
    331 	return (char *) tostring((char *) buf);
    332 }
    333 
    334 void overflo(const char *s)
    335 {
    336 	FATAL("regular expression too big: %.30s...", s);
    337 }
    338 
    339 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
    340 {
    341 	int i;
    342 	int *p;
    343 
    344 	switch (type(v)) {
    345 	ELEAF
    346 	LEAF
    347 		f->re[info(v)].ltype = type(v);
    348 		f->re[info(v)].lval.np = right(v);
    349 		while (f->accept >= maxsetvec) {	/* guessing here! */
    350 			maxsetvec *= 4;
    351 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
    352 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
    353 			if (setvec == 0 || tmpset == 0)
    354 				overflo("out of space in cfoll()");
    355 		}
    356 		for (i = 0; i <= f->accept; i++)
    357 			setvec[i] = 0;
    358 		setcnt = 0;
    359 		follow(v);	/* computes setvec and setcnt */
    360 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
    361 			overflo("out of space building follow set");
    362 		f->re[info(v)].lfollow = p;
    363 		*p = setcnt;
    364 		for (i = f->accept; i >= 0; i--)
    365 			if (setvec[i] == 1)
    366 				*++p = i;
    367 		break;
    368 	UNARY
    369 		cfoll(f,left(v));
    370 		break;
    371 	case CAT:
    372 	case OR:
    373 		cfoll(f,left(v));
    374 		cfoll(f,right(v));
    375 		break;
    376 	default:	/* can't happen */
    377 		FATAL("can't happen: unknown type %d in cfoll", type(v));
    378 	}
    379 }
    380 
    381 int first(Node *p)	/* collects initially active leaves of p into setvec */
    382 			/* returns 0 if p matches empty string */
    383 {
    384 	int b, lp;
    385 
    386 	switch (type(p)) {
    387 	ELEAF
    388 	LEAF
    389 		lp = info(p);	/* look for high-water mark of subscripts */
    390 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
    391 			maxsetvec *= 4;
    392 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
    393 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
    394 			if (setvec == 0 || tmpset == 0)
    395 				overflo("out of space in first()");
    396 		}
    397 		if (type(p) == EMPTYRE) {
    398 			setvec[lp] = 0;
    399 			return(0);
    400 		}
    401 		if (setvec[lp] != 1) {
    402 			setvec[lp] = 1;
    403 			setcnt++;
    404 		}
    405 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
    406 			return(0);		/* empty CCL */
    407 		else return(1);
    408 	case PLUS:
    409 		if (first(left(p)) == 0) return(0);
    410 		return(1);
    411 	case STAR:
    412 	case QUEST:
    413 		first(left(p));
    414 		return(0);
    415 	case CAT:
    416 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
    417 		return(1);
    418 	case OR:
    419 		b = first(right(p));
    420 		if (first(left(p)) == 0 || b == 0) return(0);
    421 		return(1);
    422 	}
    423 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
    424 	return(-1);
    425 }
    426 
    427 void follow(Node *v)	/* collects leaves that can follow v into setvec */
    428 {
    429 	Node *p;
    430 
    431 	if (type(v) == FINAL)
    432 		return;
    433 	p = parent(v);
    434 	switch (type(p)) {
    435 	case STAR:
    436 	case PLUS:
    437 		first(v);
    438 		follow(p);
    439 		return;
    440 
    441 	case OR:
    442 	case QUEST:
    443 		follow(p);
    444 		return;
    445 
    446 	case CAT:
    447 		if (v == left(p)) {	/* v is left child of p */
    448 			if (first(right(p)) == 0) {
    449 				follow(p);
    450 				return;
    451 			}
    452 		} else		/* v is right child */
    453 			follow(p);
    454 		return;
    455 	}
    456 }
    457 
    458 int member(int c, const char *sarg)	/* is c in s? */
    459 {
    460 	uschar *s = (uschar *) sarg;
    461 
    462 	while (*s)
    463 		if (c == *s++)
    464 			return(1);
    465 	return(0);
    466 }
    467 
    468 int match(fa *f, const char *p0)	/* shortest match ? */
    469 {
    470 	int s, ns;
    471 	uschar *p = (uschar *) p0;
    472 
    473 	s = f->reset ? makeinit(f,0) : f->initstat;
    474 	if (f->out[s])
    475 		return(1);
    476 	do {
    477 		/* assert(*p < NCHARS); */
    478 		if ((ns = f->gototab[s][*p]) != 0)
    479 			s = ns;
    480 		else
    481 			s = cgoto(f, s, *p);
    482 		if (f->out[s])
    483 			return(1);
    484 	} while (*p++ != 0);
    485 	return(0);
    486 }
    487 
    488 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
    489 {
    490 	int s, ns;
    491 	uschar *p = (uschar *) p0;
    492 	uschar *q;
    493 	int i, k;
    494 
    495 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
    496 	if (f->reset) {
    497 		f->initstat = s = makeinit(f,1);
    498 	} else {
    499 		s = f->initstat;
    500 	}
    501 	patbeg = (char *) p;
    502 	patlen = -1;
    503 	do {
    504 		q = p;
    505 		do {
    506 			if (f->out[s])		/* final state */
    507 				patlen = q-p;
    508 			/* assert(*q < NCHARS); */
    509 			if ((ns = f->gototab[s][*q]) != 0)
    510 				s = ns;
    511 			else
    512 				s = cgoto(f, s, *q);
    513 			if (s == 1) {	/* no transition */
    514 				if (patlen >= 0) {
    515 					patbeg = (char *) p;
    516 					return(1);
    517 				}
    518 				else
    519 					goto nextin;	/* no match */
    520 			}
    521 		} while (*q++ != 0);
    522 		if (f->out[s])
    523 			patlen = q-p-1;	/* don't count $ */
    524 		if (patlen >= 0) {
    525 			patbeg = (char *) p;
    526 			return(1);
    527 		}
    528 	nextin:
    529 		s = 2;
    530 		if (f->reset) {
    531 			for (i = 2; i <= f->curstat; i++)
    532 				xfree(f->posns[i]);
    533 			k = *f->posns[0];
    534 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
    535 				overflo("out of space in pmatch");
    536 			for (i = 0; i <= k; i++)
    537 				(f->posns[2])[i] = (f->posns[0])[i];
    538 			f->initstat = f->curstat = 2;
    539 			f->out[2] = f->out[0];
    540 			for (i = 0; i < NCHARS; i++)
    541 				f->gototab[2][i] = 0;
    542 		}
    543 	} while (*p++ != 0);
    544 	return (0);
    545 }
    546 
    547 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
    548 {
    549 	int s, ns;
    550 	uschar *p = (uschar *) p0;
    551 	uschar *q;
    552 	int i, k;
    553 
    554 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
    555 	if (f->reset) {
    556 		f->initstat = s = makeinit(f,1);
    557 	} else {
    558 		s = f->initstat;
    559 	}
    560 	patlen = -1;
    561 	while (*p) {
    562 		q = p;
    563 		do {
    564 			if (f->out[s])		/* final state */
    565 				patlen = q-p;
    566 			/* assert(*q < NCHARS); */
    567 			if ((ns = f->gototab[s][*q]) != 0)
    568 				s = ns;
    569 			else
    570 				s = cgoto(f, s, *q);
    571 			if (s == 1) {	/* no transition */
    572 				if (patlen > 0) {
    573 					patbeg = (char *) p;
    574 					return(1);
    575 				} else
    576 					goto nnextin;	/* no nonempty match */
    577 			}
    578 		} while (*q++ != 0);
    579 		if (f->out[s])
    580 			patlen = q-p-1;	/* don't count $ */
    581 		if (patlen > 0 ) {
    582 			patbeg = (char *) p;
    583 			return(1);
    584 		}
    585 	nnextin:
    586 		s = 2;
    587 		if (f->reset) {
    588 			for (i = 2; i <= f->curstat; i++)
    589 				xfree(f->posns[i]);
    590 			k = *f->posns[0];
    591 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
    592 				overflo("out of state space");
    593 			for (i = 0; i <= k; i++)
    594 				(f->posns[2])[i] = (f->posns[0])[i];
    595 			f->initstat = f->curstat = 2;
    596 			f->out[2] = f->out[0];
    597 			for (i = 0; i < NCHARS; i++)
    598 				f->gototab[2][i] = 0;
    599 		}
    600 		p++;
    601 	}
    602 	return (0);
    603 }
    604 
    605 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
    606 {			/* uses relex() to scan regular expression */
    607 	Node *np;
    608 
    609 	dprintf( ("reparse <%s>\n", p) );
    610 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
    611 	rtok = relex();
    612 	/* GNU compatibility: an empty regexp matches anything */
    613 	if (rtok == '\0') {
    614 		/* FATAL("empty regular expression"); previous */
    615 		return(op2(EMPTYRE, NIL, NIL));
    616 	}
    617 	np = regexp();
    618 	if (rtok != '\0')
    619 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
    620 	return(np);
    621 }
    622 
    623 Node *regexp(void)	/* top-level parse of reg expr */
    624 {
    625 	return (alt(concat(primary())));
    626 }
    627 
    628 Node *primary(void)
    629 {
    630 	Node *np;
    631 
    632 	switch (rtok) {
    633 	case CHAR:
    634 		np = op2(CHAR, NIL, itonp(rlxval));
    635 		rtok = relex();
    636 		return (unary(np));
    637 	case ALL:
    638 		rtok = relex();
    639 		return (unary(op2(ALL, NIL, NIL)));
    640 	case EMPTYRE:
    641 		rtok = relex();
    642 		return (unary(op2(ALL, NIL, NIL)));
    643 	case DOT:
    644 		rtok = relex();
    645 		return (unary(op2(DOT, NIL, NIL)));
    646 	case CCL:
    647 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
    648 		rtok = relex();
    649 		return (unary(np));
    650 	case NCCL:
    651 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
    652 		rtok = relex();
    653 		return (unary(np));
    654 	case '^':
    655 		rtok = relex();
    656 		return (unary(op2(CHAR, NIL, itonp(HAT))));
    657 	case '$':
    658 		rtok = relex();
    659 		return (unary(op2(CHAR, NIL, NIL)));
    660 	case '(':
    661 		rtok = relex();
    662 		if (rtok == ')') {	/* special pleading for () */
    663 			rtok = relex();
    664 			return unary(op2(CCL, NIL, (Node *) tostring("")));
    665 		}
    666 		np = regexp();
    667 		if (rtok == ')') {
    668 			rtok = relex();
    669 			return (unary(np));
    670 		}
    671 		else
    672 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
    673 	default:
    674 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
    675 	}
    676 	return 0;	/*NOTREACHED*/
    677 }
    678 
    679 Node *concat(Node *np)
    680 {
    681 	switch (rtok) {
    682 	case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
    683 		return (concat(op2(CAT, np, primary())));
    684 	}
    685 	return (np);
    686 }
    687 
    688 Node *alt(Node *np)
    689 {
    690 	if (rtok == OR) {
    691 		rtok = relex();
    692 		return (alt(op2(OR, np, concat(primary()))));
    693 	}
    694 	return (np);
    695 }
    696 
    697 Node *unary(Node *np)
    698 {
    699 	switch (rtok) {
    700 	case STAR:
    701 		rtok = relex();
    702 		return (unary(op2(STAR, np, NIL)));
    703 	case PLUS:
    704 		rtok = relex();
    705 		return (unary(op2(PLUS, np, NIL)));
    706 	case QUEST:
    707 		rtok = relex();
    708 		return (unary(op2(QUEST, np, NIL)));
    709 	default:
    710 		return (np);
    711 	}
    712 }
    713 
    714 /*
    715  * Character class definitions conformant to the POSIX locale as
    716  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
    717  * and operating character sets are both ASCII (ISO646) or supersets
    718  * thereof.
    719  *
    720  * Note that to avoid overflowing the temporary buffer used in
    721  * relex(), the expanded character class (prior to range expansion)
    722  * must be less than twice the size of their full name.
    723  */
    724 
    725 /* Because isblank doesn't show up in any of the header files on any
    726  * system i use, it's defined here.  if some other locale has a richer
    727  * definition of "blank", define HAS_ISBLANK and provide your own
    728  * version.
    729  * the parentheses here are an attempt to find a path through the maze
    730  * of macro definition and/or function and/or version provided.  thanks
    731  * to nelson beebe for the suggestion; let's see if it works everywhere.
    732  */
    733 
    734 #ifndef HAS_ISBLANK
    735 
    736 int (my_isblank)(int c)
    737 {
    738 	return c==' ' || c=='\t';
    739 }
    740 #undef  my_isblank
    741 #define isblank my_isblank
    742 
    743 #endif
    744 
    745 struct charclass {
    746 	const char *cc_name;
    747 	int cc_namelen;
    748 	int (*cc_func)(int);
    749 } charclasses[] = {
    750 	{ "alnum",	5,	isalnum },
    751 	{ "alpha",	5,	isalpha },
    752 	{ "blank",	5,	isblank },
    753 	{ "cntrl",	5,	iscntrl },
    754 	{ "digit",	5,	isdigit },
    755 	{ "graph",	5,	isgraph },
    756 	{ "lower",	5,	islower },
    757 	{ "print",	5,	isprint },
    758 	{ "punct",	5,	ispunct },
    759 	{ "space",	5,	isspace },
    760 	{ "upper",	5,	isupper },
    761 	{ "xdigit",	6,	isxdigit },
    762 	{ NULL,		0,	NULL },
    763 };
    764 
    765 
    766 int relex(void)		/* lexical analyzer for reparse */
    767 {
    768 	int c, n;
    769 	int cflag;
    770 	static uschar *buf = 0;
    771 	static int bufsz = 100;
    772 	uschar *bp;
    773 	struct charclass *cc;
    774 	int i;
    775 
    776 	switch (c = *prestr++) {
    777 	case '|': return OR;
    778 	case '*': return STAR;
    779 	case '+': return PLUS;
    780 	case '?': return QUEST;
    781 	case '.': return DOT;
    782 	case '\0': prestr--; return '\0';
    783 	case '^':
    784 	case '$':
    785 	case '(':
    786 	case ')':
    787 		return c;
    788 	case '\\':
    789 		rlxval = quoted((char **) &prestr);
    790 		return CHAR;
    791 	default:
    792 		rlxval = c;
    793 		return CHAR;
    794 	case '[':
    795 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
    796 			FATAL("out of space in reg expr %.10s..", lastre);
    797 		bp = buf;
    798 		if (*prestr == '^') {
    799 			cflag = 1;
    800 			prestr++;
    801 		}
    802 		else
    803 			cflag = 0;
    804 		n = 2 * strlen((const char *) prestr)+1;
    805 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
    806 			FATAL("out of space for reg expr %.10s...", lastre);
    807 		for (; ; ) {
    808 			if ((c = *prestr++) == '\\') {
    809 				*bp++ = '\\';
    810 				if ((c = *prestr++) == '\0')
    811 					FATAL("nonterminated character class %.20s...", lastre);
    812 				*bp++ = c;
    813 			/* } else if (c == '\n') { */
    814 			/* 	FATAL("newline in character class %.20s...", lastre); */
    815 			} else if (c == '[' && *prestr == ':') {
    816 				/* POSIX char class names, Dag-Erling Smorgrav, des (at) ofug.org */
    817 				for (cc = charclasses; cc->cc_name; cc++)
    818 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
    819 						break;
    820 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
    821 				    prestr[2 + cc->cc_namelen] == ']') {
    822 					prestr += cc->cc_namelen + 3;
    823 					for (i = 0; i < NCHARS; i++) {
    824 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
    825 						    FATAL("out of space for reg expr %.10s...", lastre);
    826 						if (cc->cc_func(i)) {
    827 							*bp++ = i;
    828 							n++;
    829 						}
    830 					}
    831 				} else
    832 					*bp++ = c;
    833 			} else if (c == '\0') {
    834 				FATAL("nonterminated character class %.20s", lastre);
    835 			} else if (bp == buf) {	/* 1st char is special */
    836 				*bp++ = c;
    837 			} else if (c == ']') {
    838 				*bp++ = 0;
    839 				rlxstr = (uschar *) tostring((char *) buf);
    840 				if (cflag == 0)
    841 					return CCL;
    842 				else
    843 					return NCCL;
    844 			} else
    845 				*bp++ = c;
    846 		}
    847 	}
    848 }
    849 
    850 int cgoto(fa *f, int s, int c)
    851 {
    852 	int i, j, k;
    853 	int *p, *q;
    854 
    855 	assert(c == HAT || c < NCHARS);
    856 	while (f->accept >= maxsetvec) {	/* guessing here! */
    857 		maxsetvec *= 4;
    858 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
    859 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
    860 		if (setvec == 0 || tmpset == 0)
    861 			overflo("out of space in cgoto()");
    862 	}
    863 	for (i = 0; i <= f->accept; i++)
    864 		setvec[i] = 0;
    865 	setcnt = 0;
    866 	/* compute positions of gototab[s,c] into setvec */
    867 	p = f->posns[s];
    868 	for (i = 1; i <= *p; i++) {
    869 		if ((k = f->re[p[i]].ltype) != FINAL) {
    870 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
    871 			 || (k == DOT && c != 0 && c != HAT)
    872 			 || (k == ALL && c != 0)
    873 			 || (k == EMPTYRE && c != 0)
    874 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
    875 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
    876 				q = f->re[p[i]].lfollow;
    877 				for (j = 1; j <= *q; j++) {
    878 					if (q[j] >= maxsetvec) {
    879 						maxsetvec *= 4;
    880 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
    881 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
    882 						if (setvec == 0 || tmpset == 0)
    883 							overflo("cgoto overflow");
    884 					}
    885 					if (setvec[q[j]] == 0) {
    886 						setcnt++;
    887 						setvec[q[j]] = 1;
    888 					}
    889 				}
    890 			}
    891 		}
    892 	}
    893 	/* determine if setvec is a previous state */
    894 	tmpset[0] = setcnt;
    895 	j = 1;
    896 	for (i = f->accept; i >= 0; i--)
    897 		if (setvec[i]) {
    898 			tmpset[j++] = i;
    899 		}
    900 	/* tmpset == previous state? */
    901 	for (i = 1; i <= f->curstat; i++) {
    902 		p = f->posns[i];
    903 		if ((k = tmpset[0]) != p[0])
    904 			goto different;
    905 		for (j = 1; j <= k; j++)
    906 			if (tmpset[j] != p[j])
    907 				goto different;
    908 		/* setvec is state i */
    909 		f->gototab[s][c] = i;
    910 		return i;
    911 	  different:;
    912 	}
    913 
    914 	/* add tmpset to current set of states */
    915 	if (f->curstat >= NSTATES-1) {
    916 		f->curstat = 2;
    917 		f->reset = 1;
    918 		for (i = 2; i < NSTATES; i++)
    919 			xfree(f->posns[i]);
    920 	} else
    921 		++(f->curstat);
    922 	for (i = 0; i < NCHARS; i++)
    923 		f->gototab[f->curstat][i] = 0;
    924 	xfree(f->posns[f->curstat]);
    925 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
    926 		overflo("out of space in cgoto");
    927 
    928 	f->posns[f->curstat] = p;
    929 	f->gototab[s][c] = f->curstat;
    930 	for (i = 0; i <= setcnt; i++)
    931 		p[i] = tmpset[i];
    932 	if (setvec[f->accept])
    933 		f->out[f->curstat] = 1;
    934 	else
    935 		f->out[f->curstat] = 0;
    936 	return f->curstat;
    937 }
    938 
    939 
    940 void freefa(fa *f)	/* free a finite automaton */
    941 {
    942 	int i;
    943 
    944 	if (f == NULL)
    945 		return;
    946 	for (i = 0; i <= f->curstat; i++)
    947 		xfree(f->posns[i]);
    948 	for (i = 0; i <= f->accept; i++) {
    949 		xfree(f->re[i].lfollow);
    950 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
    951 			xfree((f->re[i].lval.np));
    952 	}
    953 	xfree(f->restr);
    954 	xfree(f);
    955 }
    956