Home | History | Annotate | Download | only in src
      1 /*	$OpenBSD: syn.c,v 1.30 2015/09/01 13:12:31 tedu Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009,
      5  *		 2011, 2012, 2013, 2014, 2015, 2016, 2017,
      6  *		 2018
      7  *	mirabilos <m (at) mirbsd.org>
      8  *
      9  * Provided that these terms and disclaimer and all copyright notices
     10  * are retained or reproduced in an accompanying document, permission
     11  * is granted to deal in this work without restriction, including un-
     12  * limited rights to use, publicly perform, distribute, sell, modify,
     13  * merge, give away, or sublicence.
     14  *
     15  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
     16  * the utmost extent permitted by applicable law, neither express nor
     17  * implied; without malicious intent or gross negligence. In no event
     18  * may a licensor, author or contributor be held liable for indirect,
     19  * direct, other damage, loss, or other issues arising in any way out
     20  * of dealing in the work, even if advised of the possibility of such
     21  * damage or existence of a defect, except proven that it results out
     22  * of said person's immediate fault when using the work as intended.
     23  */
     24 
     25 #include "sh.h"
     26 
     27 __RCSID("$MirOS: src/bin/mksh/syn.c,v 1.127 2018/01/14 00:22:30 tg Exp $");
     28 
     29 struct nesting_state {
     30 	int start_token;	/* token than began nesting (eg, FOR) */
     31 	int start_line;		/* line nesting began on */
     32 };
     33 
     34 struct yyrecursive_state {
     35 	struct ioword *old_heres[HERES];
     36 	struct yyrecursive_state *next;
     37 	struct ioword **old_herep;
     38 	int old_symbol;
     39 	unsigned int old_nesting_type;
     40 	bool old_reject;
     41 };
     42 
     43 static void yyparse(bool);
     44 static struct op *pipeline(int, int);
     45 static struct op *andor(int);
     46 static struct op *c_list(int, bool);
     47 static struct ioword *synio(int);
     48 static struct op *nested(int, int, int, int);
     49 static struct op *get_command(int, int);
     50 static struct op *dogroup(int);
     51 static struct op *thenpart(int);
     52 static struct op *elsepart(int);
     53 static struct op *caselist(int);
     54 static struct op *casepart(int, int);
     55 static struct op *function_body(char *, int, bool);
     56 static char **wordlist(int);
     57 static struct op *block(int, struct op *, struct op *);
     58 static struct op *newtp(int);
     59 static void syntaxerr(const char *) MKSH_A_NORETURN;
     60 static void nesting_push(struct nesting_state *, int);
     61 static void nesting_pop(struct nesting_state *);
     62 static int inalias(struct source *) MKSH_A_PURE;
     63 static Test_op dbtestp_isa(Test_env *, Test_meta);
     64 static const char *dbtestp_getopnd(Test_env *, Test_op, bool);
     65 static int dbtestp_eval(Test_env *, Test_op, const char *,
     66     const char *, bool);
     67 static void dbtestp_error(Test_env *, int, const char *) MKSH_A_NORETURN;
     68 
     69 static struct op *outtree;		/* yyparse output */
     70 static struct nesting_state nesting;	/* \n changed to ; */
     71 
     72 static bool reject;			/* token(cf) gets symbol again */
     73 static int symbol;			/* yylex value */
     74 
     75 #define REJECT		(reject = true)
     76 #define ACCEPT		(reject = false)
     77 #define token(cf)	((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
     78 #define tpeek(cf)	((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
     79 #define musthave(c,cf)	do { 					\
     80 	if ((unsigned int)token(cf) != (unsigned int)(c))	\
     81 		syntaxerr(NULL);				\
     82 } while (/* CONSTCOND */ 0)
     83 
     84 static const char Tcbrace[] = "}";
     85 static const char Tesac[] = "esac";
     86 
     87 static void
     88 yyparse(bool doalias)
     89 {
     90 	int c;
     91 
     92 	ACCEPT;
     93 
     94 	outtree = c_list(doalias ? ALIAS : 0, source->type == SSTRING);
     95 	c = tpeek(0);
     96 	if (c == 0 && !outtree)
     97 		outtree = newtp(TEOF);
     98 	else if (!cinttype(c, C_LF | C_NUL))
     99 		syntaxerr(NULL);
    100 }
    101 
    102 static struct op *
    103 pipeline(int cf, int sALIAS)
    104 {
    105 	struct op *t, *p, *tl = NULL;
    106 
    107 	t = get_command(cf, sALIAS);
    108 	if (t != NULL) {
    109 		while (token(0) == '|') {
    110 			if ((p = get_command(CONTIN, sALIAS)) == NULL)
    111 				syntaxerr(NULL);
    112 			if (tl == NULL)
    113 				t = tl = block(TPIPE, t, p);
    114 			else
    115 				tl = tl->right = block(TPIPE, tl->right, p);
    116 		}
    117 		REJECT;
    118 	}
    119 	return (t);
    120 }
    121 
    122 static struct op *
    123 andor(int sALIAS)
    124 {
    125 	struct op *t, *p;
    126 	int c;
    127 
    128 	t = pipeline(0, sALIAS);
    129 	if (t != NULL) {
    130 		while ((c = token(0)) == LOGAND || c == LOGOR) {
    131 			if ((p = pipeline(CONTIN, sALIAS)) == NULL)
    132 				syntaxerr(NULL);
    133 			t = block(c == LOGAND? TAND: TOR, t, p);
    134 		}
    135 		REJECT;
    136 	}
    137 	return (t);
    138 }
    139 
    140 static struct op *
    141 c_list(int sALIAS, bool multi)
    142 {
    143 	struct op *t = NULL, *p, *tl = NULL;
    144 	int c;
    145 	bool have_sep;
    146 
    147 	while (/* CONSTCOND */ 1) {
    148 		p = andor(sALIAS);
    149 		/*
    150 		 * Token has always been read/rejected at this point, so
    151 		 * we don't worry about what flags to pass token()
    152 		 */
    153 		c = token(0);
    154 		have_sep = true;
    155 		if (c == '\n' && (multi || inalias(source))) {
    156 			if (!p)
    157 				/* ignore blank lines */
    158 				continue;
    159 		} else if (!p)
    160 			break;
    161 		else if (c == '&' || c == COPROC)
    162 			p = block(c == '&' ? TASYNC : TCOPROC, p, NULL);
    163 		else if (c != ';')
    164 			have_sep = false;
    165 		if (!t)
    166 			t = p;
    167 		else if (!tl)
    168 			t = tl = block(TLIST, t, p);
    169 		else
    170 			tl = tl->right = block(TLIST, tl->right, p);
    171 		if (!have_sep)
    172 			break;
    173 	}
    174 	REJECT;
    175 	return (t);
    176 }
    177 
    178 static const char IONDELIM_delim[] = { CHAR, '<', CHAR, '<', EOS };
    179 
    180 static struct ioword *
    181 synio(int cf)
    182 {
    183 	struct ioword *iop;
    184 	static struct ioword *nextiop;
    185 	bool ishere;
    186 
    187 	if (nextiop != NULL) {
    188 		iop = nextiop;
    189 		nextiop = NULL;
    190 		return (iop);
    191 	}
    192 
    193 	if (tpeek(cf) != REDIR)
    194 		return (NULL);
    195 	ACCEPT;
    196 	iop = yylval.iop;
    197 	ishere = (iop->ioflag & IOTYPE) == IOHERE;
    198 	if (iop->ioflag & IOHERESTR) {
    199 		musthave(LWORD, 0);
    200 	} else if (ishere && tpeek(HEREDELIM) == '\n') {
    201 		ACCEPT;
    202 		yylval.cp = wdcopy(IONDELIM_delim, ATEMP);
    203 		iop->ioflag |= IOEVAL | IONDELIM;
    204 	} else
    205 		musthave(LWORD, ishere ? HEREDELIM : 0);
    206 	if (ishere) {
    207 		iop->delim = yylval.cp;
    208 		if (*ident != 0 && !(iop->ioflag & IOHERESTR)) {
    209 			/* unquoted */
    210 			iop->ioflag |= IOEVAL;
    211 		}
    212 		if (herep > &heres[HERES - 1])
    213 			yyerror(Tf_toomany, "<<");
    214 		*herep++ = iop;
    215 	} else
    216 		iop->ioname = yylval.cp;
    217 
    218 	if (iop->ioflag & IOBASH) {
    219 		char *cp;
    220 
    221 		nextiop = alloc(sizeof(*iop), ATEMP);
    222 		nextiop->ioname = cp = alloc(3, ATEMP);
    223 		*cp++ = CHAR;
    224 		*cp++ = digits_lc[iop->unit % 10];
    225 		*cp = EOS;
    226 
    227 		iop->ioflag &= ~IOBASH;
    228 		nextiop->unit = 2;
    229 		nextiop->ioflag = IODUP;
    230 		nextiop->delim = NULL;
    231 		nextiop->heredoc = NULL;
    232 	}
    233 	return (iop);
    234 }
    235 
    236 static struct op *
    237 nested(int type, int smark, int emark, int sALIAS)
    238 {
    239 	struct op *t;
    240 	struct nesting_state old_nesting;
    241 
    242 	nesting_push(&old_nesting, smark);
    243 	t = c_list(sALIAS, true);
    244 	musthave(emark, KEYWORD|sALIAS);
    245 	nesting_pop(&old_nesting);
    246 	return (block(type, t, NULL));
    247 }
    248 
    249 static const char builtin_cmd[] = {
    250 	QCHAR, '\\', CHAR, 'b', CHAR, 'u', CHAR, 'i',
    251 	CHAR, 'l', CHAR, 't', CHAR, 'i', CHAR, 'n', EOS
    252 };
    253 static const char let_cmd[] = {
    254 	CHAR, 'l', CHAR, 'e', CHAR, 't', EOS
    255 };
    256 static const char setA_cmd0[] = {
    257 	CHAR, 's', CHAR, 'e', CHAR, 't', EOS
    258 };
    259 static const char setA_cmd1[] = {
    260 	CHAR, '-', CHAR, 'A', EOS
    261 };
    262 static const char setA_cmd2[] = {
    263 	CHAR, '-', CHAR, '-', EOS
    264 };
    265 
    266 static struct op *
    267 get_command(int cf, int sALIAS)
    268 {
    269 	struct op *t;
    270 	int c, iopn = 0, syniocf, lno;
    271 	struct ioword *iop, **iops;
    272 	XPtrV args, vars;
    273 	struct nesting_state old_nesting;
    274 
    275 	/* NUFILE is small enough to leave this addition unchecked */
    276 	iops = alloc2((NUFILE + 1), sizeof(struct ioword *), ATEMP);
    277 	XPinit(args, 16);
    278 	XPinit(vars, 16);
    279 
    280 	syniocf = KEYWORD|sALIAS;
    281 	switch (c = token(cf|KEYWORD|sALIAS|CMDASN)) {
    282 	default:
    283 		REJECT;
    284 		afree(iops, ATEMP);
    285 		XPfree(args);
    286 		XPfree(vars);
    287 		/* empty line */
    288 		return (NULL);
    289 
    290 	case LWORD:
    291 	case REDIR:
    292 		REJECT;
    293 		syniocf &= ~(KEYWORD|sALIAS);
    294 		t = newtp(TCOM);
    295 		t->lineno = source->line;
    296 		goto get_command_start;
    297 		while (/* CONSTCOND */ 1) {
    298 			bool check_decl_utility;
    299 
    300 			if (XPsize(args) == 0) {
    301  get_command_start:
    302 				check_decl_utility = true;
    303 				cf = sALIAS | CMDASN;
    304 			} else if (t->u.evalflags)
    305 				cf = CMDWORD | CMDASN;
    306 			else
    307 				cf = CMDWORD;
    308 			switch (tpeek(cf)) {
    309 			case REDIR:
    310 				while ((iop = synio(cf)) != NULL) {
    311 					if (iopn >= NUFILE)
    312 						yyerror(Tf_toomany,
    313 						    Tredirection);
    314 					iops[iopn++] = iop;
    315 				}
    316 				break;
    317 
    318 			case LWORD:
    319 				ACCEPT;
    320 				if (check_decl_utility) {
    321 					struct tbl *tt = get_builtin(ident);
    322 					uint32_t flag;
    323 
    324 					flag = tt ? tt->flag : 0;
    325 					if (flag & DECL_UTIL)
    326 						t->u.evalflags = DOVACHECK;
    327 					if (!(flag & DECL_FWDR))
    328 						check_decl_utility = false;
    329 				}
    330 				if ((XPsize(args) == 0 || Flag(FKEYWORD)) &&
    331 				    is_wdvarassign(yylval.cp))
    332 					XPput(vars, yylval.cp);
    333 				else
    334 					XPput(args, yylval.cp);
    335 				break;
    336 
    337 			case ORD('(' /*)*/):
    338 				if (XPsize(args) == 0 && XPsize(vars) == 1 &&
    339 				    is_wdvarassign(yylval.cp)) {
    340 					char *tcp;
    341 
    342 					/* wdarrassign: foo=(bar) */
    343 					ACCEPT;
    344 
    345 					/* manipulate the vars string */
    346 					tcp = XPptrv(vars)[(vars.len = 0)];
    347 					/* 'varname=' -> 'varname' */
    348 					tcp[wdscan(tcp, EOS) - tcp - 3] = EOS;
    349 
    350 					/* construct new args strings */
    351 					XPput(args, wdcopy(builtin_cmd, ATEMP));
    352 					XPput(args, wdcopy(setA_cmd0, ATEMP));
    353 					XPput(args, wdcopy(setA_cmd1, ATEMP));
    354 					XPput(args, tcp);
    355 					XPput(args, wdcopy(setA_cmd2, ATEMP));
    356 
    357 					/* slurp in words till closing paren */
    358 					while (token(CONTIN) == LWORD)
    359 						XPput(args, yylval.cp);
    360 					if (symbol != /*(*/ ')')
    361 						syntaxerr(NULL);
    362 				} else {
    363 					/*
    364 					 * Check for "> foo (echo hi)"
    365 					 * which AT&T ksh allows (not
    366 					 * POSIX, but not disallowed)
    367 					 */
    368 					afree(t, ATEMP);
    369 					if (XPsize(args) == 0 &&
    370 					    XPsize(vars) == 0) {
    371 						ACCEPT;
    372 						goto Subshell;
    373 					}
    374 
    375 					/* must be a function */
    376 					if (iopn != 0 || XPsize(args) != 1 ||
    377 					    XPsize(vars) != 0)
    378 						syntaxerr(NULL);
    379 					ACCEPT;
    380 					musthave(/*(*/ ')', 0);
    381 					t = function_body(XPptrv(args)[0],
    382 					    sALIAS, false);
    383 				}
    384 				goto Leave;
    385 
    386 			default:
    387 				goto Leave;
    388 			}
    389 		}
    390  Leave:
    391 		break;
    392 
    393 	case ORD('(' /*)*/): {
    394 		unsigned int subshell_nesting_type_saved;
    395  Subshell:
    396 		subshell_nesting_type_saved = subshell_nesting_type;
    397 		subshell_nesting_type = ORD(')');
    398 		t = nested(TPAREN, ORD('('), ORD(')'), sALIAS);
    399 		subshell_nesting_type = subshell_nesting_type_saved;
    400 		break;
    401 	    }
    402 
    403 	case ORD('{' /*}*/):
    404 		t = nested(TBRACE, ORD('{'), ORD('}'), sALIAS);
    405 		break;
    406 
    407 	case MDPAREN:
    408 		/* leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
    409 		lno = source->line;
    410 		ACCEPT;
    411 		switch (token(LETEXPR)) {
    412 		case LWORD:
    413 			break;
    414 		case ORD('(' /*)*/):
    415 			c = ORD('(');
    416 			goto Subshell;
    417 		default:
    418 			syntaxerr(NULL);
    419 		}
    420 		t = newtp(TCOM);
    421 		t->lineno = lno;
    422 		XPput(args, wdcopy(builtin_cmd, ATEMP));
    423 		XPput(args, wdcopy(let_cmd, ATEMP));
    424 		XPput(args, yylval.cp);
    425 		break;
    426 
    427 	case DBRACKET: /* [[ .. ]] */
    428 		/* leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
    429 		t = newtp(TDBRACKET);
    430 		ACCEPT;
    431 		{
    432 			Test_env te;
    433 
    434 			te.flags = TEF_DBRACKET;
    435 			te.pos.av = &args;
    436 			te.isa = dbtestp_isa;
    437 			te.getopnd = dbtestp_getopnd;
    438 			te.eval = dbtestp_eval;
    439 			te.error = dbtestp_error;
    440 
    441 			test_parse(&te);
    442 		}
    443 		break;
    444 
    445 	case FOR:
    446 	case SELECT:
    447 		t = newtp((c == FOR) ? TFOR : TSELECT);
    448 		musthave(LWORD, CMDASN);
    449 		if (!is_wdvarname(yylval.cp, true))
    450 			yyerror("%s: bad identifier",
    451 			    c == FOR ? "for" : Tselect);
    452 		strdupx(t->str, ident, ATEMP);
    453 		nesting_push(&old_nesting, c);
    454 		t->vars = wordlist(sALIAS);
    455 		t->left = dogroup(sALIAS);
    456 		nesting_pop(&old_nesting);
    457 		break;
    458 
    459 	case WHILE:
    460 	case UNTIL:
    461 		nesting_push(&old_nesting, c);
    462 		t = newtp((c == WHILE) ? TWHILE : TUNTIL);
    463 		t->left = c_list(sALIAS, true);
    464 		t->right = dogroup(sALIAS);
    465 		nesting_pop(&old_nesting);
    466 		break;
    467 
    468 	case CASE:
    469 		t = newtp(TCASE);
    470 		musthave(LWORD, 0);
    471 		t->str = yylval.cp;
    472 		nesting_push(&old_nesting, c);
    473 		t->left = caselist(sALIAS);
    474 		nesting_pop(&old_nesting);
    475 		break;
    476 
    477 	case IF:
    478 		nesting_push(&old_nesting, c);
    479 		t = newtp(TIF);
    480 		t->left = c_list(sALIAS, true);
    481 		t->right = thenpart(sALIAS);
    482 		musthave(FI, KEYWORD|sALIAS);
    483 		nesting_pop(&old_nesting);
    484 		break;
    485 
    486 	case BANG:
    487 		syniocf &= ~(KEYWORD|sALIAS);
    488 		t = pipeline(0, sALIAS);
    489 		if (t == NULL)
    490 			syntaxerr(NULL);
    491 		t = block(TBANG, NULL, t);
    492 		break;
    493 
    494 	case TIME:
    495 		syniocf &= ~(KEYWORD|sALIAS);
    496 		t = pipeline(0, sALIAS);
    497 		if (t && t->type == TCOM) {
    498 			t->str = alloc(2, ATEMP);
    499 			/* TF_* flags */
    500 			t->str[0] = '\0';
    501 			t->str[1] = '\0';
    502 		}
    503 		t = block(TTIME, t, NULL);
    504 		break;
    505 
    506 	case FUNCTION:
    507 		musthave(LWORD, 0);
    508 		t = function_body(yylval.cp, sALIAS, true);
    509 		break;
    510 	}
    511 
    512 	while ((iop = synio(syniocf)) != NULL) {
    513 		if (iopn >= NUFILE)
    514 			yyerror(Tf_toomany, Tredirection);
    515 		iops[iopn++] = iop;
    516 	}
    517 
    518 	if (iopn == 0) {
    519 		afree(iops, ATEMP);
    520 		t->ioact = NULL;
    521 	} else {
    522 		iops[iopn++] = NULL;
    523 		iops = aresize2(iops, iopn, sizeof(struct ioword *), ATEMP);
    524 		t->ioact = iops;
    525 	}
    526 
    527 	if (t->type == TCOM || t->type == TDBRACKET) {
    528 		XPput(args, NULL);
    529 		t->args = (const char **)XPclose(args);
    530 		XPput(vars, NULL);
    531 		t->vars = (char **)XPclose(vars);
    532 	} else {
    533 		XPfree(args);
    534 		XPfree(vars);
    535 	}
    536 
    537 	if (c == MDPAREN) {
    538 		t = block(TBRACE, t, NULL);
    539 		t->ioact = t->left->ioact;
    540 		t->left->ioact = NULL;
    541 	}
    542 
    543 	return (t);
    544 }
    545 
    546 static struct op *
    547 dogroup(int sALIAS)
    548 {
    549 	int c;
    550 	struct op *list;
    551 
    552 	c = token(CONTIN|KEYWORD|sALIAS);
    553 	/*
    554 	 * A {...} can be used instead of do...done for for/select loops
    555 	 * but not for while/until loops - we don't need to check if it
    556 	 * is a while loop because it would have been parsed as part of
    557 	 * the conditional command list...
    558 	 */
    559 	if (c == DO)
    560 		c = DONE;
    561 	else if ((unsigned int)c == ORD('{'))
    562 		c = ORD('}');
    563 	else
    564 		syntaxerr(NULL);
    565 	list = c_list(sALIAS, true);
    566 	musthave(c, KEYWORD|sALIAS);
    567 	return (list);
    568 }
    569 
    570 static struct op *
    571 thenpart(int sALIAS)
    572 {
    573 	struct op *t;
    574 
    575 	musthave(THEN, KEYWORD|sALIAS);
    576 	t = newtp(0);
    577 	t->left = c_list(sALIAS, true);
    578 	if (t->left == NULL)
    579 		syntaxerr(NULL);
    580 	t->right = elsepart(sALIAS);
    581 	return (t);
    582 }
    583 
    584 static struct op *
    585 elsepart(int sALIAS)
    586 {
    587 	struct op *t;
    588 
    589 	switch (token(KEYWORD|sALIAS|CMDASN)) {
    590 	case ELSE:
    591 		if ((t = c_list(sALIAS, true)) == NULL)
    592 			syntaxerr(NULL);
    593 		return (t);
    594 
    595 	case ELIF:
    596 		t = newtp(TELIF);
    597 		t->left = c_list(sALIAS, true);
    598 		t->right = thenpart(sALIAS);
    599 		return (t);
    600 
    601 	default:
    602 		REJECT;
    603 	}
    604 	return (NULL);
    605 }
    606 
    607 static struct op *
    608 caselist(int sALIAS)
    609 {
    610 	struct op *t, *tl;
    611 	int c;
    612 
    613 	c = token(CONTIN|KEYWORD|sALIAS);
    614 	/* A {...} can be used instead of in...esac for case statements */
    615 	if (c == IN)
    616 		c = ESAC;
    617 	else if ((unsigned int)c == ORD('{'))
    618 		c = ORD('}');
    619 	else
    620 		syntaxerr(NULL);
    621 	t = tl = NULL;
    622 	/* no ALIAS here */
    623 	while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) {
    624 		struct op *tc = casepart(c, sALIAS);
    625 		if (tl == NULL)
    626 			t = tl = tc, tl->right = NULL;
    627 		else
    628 			tl->right = tc, tl = tc;
    629 	}
    630 	musthave(c, KEYWORD|sALIAS);
    631 	return (t);
    632 }
    633 
    634 static struct op *
    635 casepart(int endtok, int sALIAS)
    636 {
    637 	struct op *t;
    638 	XPtrV ptns;
    639 
    640 	XPinit(ptns, 16);
    641 	t = newtp(TPAT);
    642 	/* no ALIAS here */
    643 	if ((unsigned int)token(CONTIN | KEYWORD) != ORD('('))
    644 		REJECT;
    645 	do {
    646 		switch (token(0)) {
    647 		case LWORD:
    648 			break;
    649 		case ORD('}'):
    650 		case ESAC:
    651 			if (symbol != endtok) {
    652 				strdupx(yylval.cp, (unsigned int)symbol ==
    653 				    ORD('}') ? Tcbrace : Tesac, ATEMP);
    654 				break;
    655 			}
    656 			/* FALLTHROUGH */
    657 		default:
    658 			syntaxerr(NULL);
    659 		}
    660 		XPput(ptns, yylval.cp);
    661 	} while (token(0) == '|');
    662 	REJECT;
    663 	XPput(ptns, NULL);
    664 	t->vars = (char **)XPclose(ptns);
    665 	musthave(ORD(')'), 0);
    666 
    667 	t->left = c_list(sALIAS, true);
    668 
    669 	/* initialise to default for ;; or omitted */
    670 	t->u.charflag = ORD(';');
    671 	/* SUSv4 requires the ;; except in the last casepart */
    672 	if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
    673 		switch (symbol) {
    674 		default:
    675 			syntaxerr(NULL);
    676 		case BRKEV:
    677 			t->u.charflag = ORD('|');
    678 			if (0)
    679 				/* FALLTHROUGH */
    680 		case BRKFT:
    681 			  t->u.charflag = ORD('&');
    682 			/* FALLTHROUGH */
    683 		case BREAK:
    684 			/* initialised above, but we need to eat the token */
    685 			ACCEPT;
    686 		}
    687 	return (t);
    688 }
    689 
    690 static struct op *
    691 function_body(char *name, int sALIAS,
    692     /* function foo { ... } vs foo() { .. } */
    693     bool ksh_func)
    694 {
    695 	char *sname, *p;
    696 	struct op *t;
    697 
    698 	sname = wdstrip(name, 0);
    699 	/*-
    700 	 * Check for valid characters in name. POSIX and AT&T ksh93 say
    701 	 * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
    702 	 * have allowed more; the following were never allowed:
    703 	 *	NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
    704 	 * C_QUOTE|C_SPC covers all but adds # * ? [ ]
    705 	 */
    706 	for (p = sname; *p; p++)
    707 		if (ctype(*p, C_QUOTE | C_SPC))
    708 			yyerror(Tinvname, sname, Tfunction);
    709 
    710 	/*
    711 	 * Note that POSIX allows only compound statements after foo(),
    712 	 * sh and AT&T ksh allow any command, go with the later since it
    713 	 * shouldn't break anything. However, for function foo, AT&T ksh
    714 	 * only accepts an open-brace.
    715 	 */
    716 	if (ksh_func) {
    717 		if ((unsigned int)tpeek(CONTIN|KEYWORD|sALIAS) == ORD('(' /*)*/)) {
    718 			/* function foo () { //}*/
    719 			ACCEPT;
    720 			musthave(ORD(/*(*/ ')'), 0);
    721 			/* degrade to POSIX function */
    722 			ksh_func = false;
    723 		}
    724 		musthave(ORD('{' /*}*/), CONTIN|KEYWORD|sALIAS);
    725 		REJECT;
    726 	}
    727 
    728 	t = newtp(TFUNCT);
    729 	t->str = sname;
    730 	t->u.ksh_func = tobool(ksh_func);
    731 	t->lineno = source->line;
    732 
    733 	if ((t->left = get_command(CONTIN, sALIAS)) == NULL) {
    734 		char *tv;
    735 		/*
    736 		 * Probably something like foo() followed by EOF or ';'.
    737 		 * This is accepted by sh and ksh88.
    738 		 * To make "typeset -f foo" work reliably (so its output can
    739 		 * be used as input), we pretend there is a colon here.
    740 		 */
    741 		t->left = newtp(TCOM);
    742 		/* (2 * sizeof(char *)) is small enough */
    743 		t->left->args = alloc(2 * sizeof(char *), ATEMP);
    744 		t->left->args[0] = tv = alloc(3, ATEMP);
    745 		tv[0] = QCHAR;
    746 		tv[1] = ':';
    747 		tv[2] = EOS;
    748 		t->left->args[1] = NULL;
    749 		t->left->vars = alloc(sizeof(char *), ATEMP);
    750 		t->left->vars[0] = NULL;
    751 		t->left->lineno = 1;
    752 	}
    753 
    754 	return (t);
    755 }
    756 
    757 static char **
    758 wordlist(int sALIAS)
    759 {
    760 	int c;
    761 	XPtrV args;
    762 
    763 	XPinit(args, 16);
    764 	/* POSIX does not do alias expansion here... */
    765 	if ((c = token(CONTIN|KEYWORD|sALIAS)) != IN) {
    766 		if (c != ';')
    767 			/* non-POSIX, but AT&T ksh accepts a ; here */
    768 			REJECT;
    769 		return (NULL);
    770 	}
    771 	while ((c = token(0)) == LWORD)
    772 		XPput(args, yylval.cp);
    773 	if (c != '\n' && c != ';')
    774 		syntaxerr(NULL);
    775 	XPput(args, NULL);
    776 	return ((char **)XPclose(args));
    777 }
    778 
    779 /*
    780  * supporting functions
    781  */
    782 
    783 static struct op *
    784 block(int type, struct op *t1, struct op *t2)
    785 {
    786 	struct op *t;
    787 
    788 	t = newtp(type);
    789 	t->left = t1;
    790 	t->right = t2;
    791 	return (t);
    792 }
    793 
    794 static const struct tokeninfo {
    795 	const char *name;
    796 	short val;
    797 	short reserved;
    798 } tokentab[] = {
    799 	/* Reserved words */
    800 	{ "if",		IF,	true },
    801 	{ "then",	THEN,	true },
    802 	{ "else",	ELSE,	true },
    803 	{ "elif",	ELIF,	true },
    804 	{ "fi",		FI,	true },
    805 	{ "case",	CASE,	true },
    806 	{ Tesac,	ESAC,	true },
    807 	{ "for",	FOR,	true },
    808 	{ Tselect,	SELECT,	true },
    809 	{ "while",	WHILE,	true },
    810 	{ "until",	UNTIL,	true },
    811 	{ "do",		DO,	true },
    812 	{ "done",	DONE,	true },
    813 	{ "in",		IN,	true },
    814 	{ Tfunction,	FUNCTION, true },
    815 	{ Ttime,	TIME,	true },
    816 	{ "{",		ORD('{'), true },
    817 	{ Tcbrace,	ORD('}'), true },
    818 	{ "!",		BANG,	true },
    819 	{ "[[",		DBRACKET, true },
    820 	/* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
    821 	{ "&&",		LOGAND,	false },
    822 	{ "||",		LOGOR,	false },
    823 	{ ";;",		BREAK,	false },
    824 	{ ";|",		BRKEV,	false },
    825 	{ ";&",		BRKFT,	false },
    826 	{ "((",		MDPAREN, false },
    827 	{ "|&",		COPROC,	false },
    828 	/* and some special cases... */
    829 	{ "newline",	ORD('\n'), false },
    830 	{ NULL,		0,	false }
    831 };
    832 
    833 void
    834 initkeywords(void)
    835 {
    836 	struct tokeninfo const *tt;
    837 	struct tbl *p;
    838 
    839 	ktinit(APERM, &keywords,
    840 	    /* currently 28 keywords: 75% of 64 = 2^6 */
    841 	    6);
    842 	for (tt = tokentab; tt->name; tt++) {
    843 		if (tt->reserved) {
    844 			p = ktenter(&keywords, tt->name, hash(tt->name));
    845 			p->flag |= DEFINED|ISSET;
    846 			p->type = CKEYWD;
    847 			p->val.i = tt->val;
    848 		}
    849 	}
    850 }
    851 
    852 static void
    853 syntaxerr(const char *what)
    854 {
    855 	/* 23<<- is the longest redirection, I think */
    856 	char redir[8];
    857 	const char *s;
    858 	struct tokeninfo const *tt;
    859 	int c;
    860 
    861 	if (!what)
    862 		what = Tunexpected;
    863 	REJECT;
    864 	c = token(0);
    865  Again:
    866 	switch (c) {
    867 	case 0:
    868 		if (nesting.start_token) {
    869 			c = nesting.start_token;
    870 			source->errline = nesting.start_line;
    871 			what = "unmatched";
    872 			goto Again;
    873 		}
    874 		/* don't quote the EOF */
    875 		yyerror("%s: unexpected EOF", Tsynerr);
    876 		/* NOTREACHED */
    877 
    878 	case LWORD:
    879 		s = snptreef(NULL, 32, Tf_S, yylval.cp);
    880 		break;
    881 
    882 	case REDIR:
    883 		s = snptreef(redir, sizeof(redir), Tft_R, yylval.iop);
    884 		break;
    885 
    886 	default:
    887 		for (tt = tokentab; tt->name; tt++)
    888 			if (tt->val == c)
    889 			    break;
    890 		if (tt->name)
    891 			s = tt->name;
    892 		else {
    893 			if (c > 0 && c < 256) {
    894 				redir[0] = c;
    895 				redir[1] = '\0';
    896 			} else
    897 				shf_snprintf(redir, sizeof(redir),
    898 					"?%d", c);
    899 			s = redir;
    900 		}
    901 	}
    902 	yyerror(Tf_sD_s_qs, Tsynerr, what, s);
    903 }
    904 
    905 static void
    906 nesting_push(struct nesting_state *save, int tok)
    907 {
    908 	*save = nesting;
    909 	nesting.start_token = tok;
    910 	nesting.start_line = source->line;
    911 }
    912 
    913 static void
    914 nesting_pop(struct nesting_state *saved)
    915 {
    916 	nesting = *saved;
    917 }
    918 
    919 static struct op *
    920 newtp(int type)
    921 {
    922 	struct op *t;
    923 
    924 	t = alloc(sizeof(struct op), ATEMP);
    925 	t->type = type;
    926 	t->u.evalflags = 0;
    927 	t->args = NULL;
    928 	t->vars = NULL;
    929 	t->ioact = NULL;
    930 	t->left = t->right = NULL;
    931 	t->str = NULL;
    932 	return (t);
    933 }
    934 
    935 struct op *
    936 compile(Source *s, bool skiputf8bom, bool doalias)
    937 {
    938 	nesting.start_token = 0;
    939 	nesting.start_line = 0;
    940 	herep = heres;
    941 	source = s;
    942 	if (skiputf8bom)
    943 		yyskiputf8bom();
    944 	yyparse(doalias);
    945 	return (outtree);
    946 }
    947 
    948 /* Check if we are in the middle of reading an alias */
    949 static int
    950 inalias(struct source *s)
    951 {
    952 	while (s && s->type == SALIAS) {
    953 		if (!(s->flags & SF_ALIASEND))
    954 			return (1);
    955 		s = s->next;
    956 	}
    957 	return (0);
    958 }
    959 
    960 
    961 /*
    962  * Order important - indexed by Test_meta values
    963  * Note that ||, &&, ( and ) can't appear in as unquoted strings
    964  * in normal shell input, so these can be interpreted unambiguously
    965  * in the evaluation pass.
    966  */
    967 static const char dbtest_or[] = { CHAR, '|', CHAR, '|', EOS };
    968 static const char dbtest_and[] = { CHAR, '&', CHAR, '&', EOS };
    969 static const char dbtest_not[] = { CHAR, '!', EOS };
    970 static const char dbtest_oparen[] = { CHAR, '(', EOS };
    971 static const char dbtest_cparen[] = { CHAR, ')', EOS };
    972 const char * const dbtest_tokens[] = {
    973 	dbtest_or, dbtest_and, dbtest_not,
    974 	dbtest_oparen, dbtest_cparen
    975 };
    976 static const char db_close[] = { CHAR, ']', CHAR, ']', EOS };
    977 static const char db_lthan[] = { CHAR, '<', EOS };
    978 static const char db_gthan[] = { CHAR, '>', EOS };
    979 
    980 /*
    981  * Test if the current token is a whatever. Accepts the current token if
    982  * it is. Returns 0 if it is not, non-zero if it is (in the case of
    983  * TM_UNOP and TM_BINOP, the returned value is a Test_op).
    984  */
    985 static Test_op
    986 dbtestp_isa(Test_env *te, Test_meta meta)
    987 {
    988 	int c = tpeek(CMDASN | (meta == TM_BINOP ? 0 : CONTIN));
    989 	bool uqword;
    990 	char *save = NULL;
    991 	Test_op ret = TO_NONOP;
    992 
    993 	/* unquoted word? */
    994 	uqword = c == LWORD && *ident;
    995 
    996 	if (meta == TM_OR)
    997 		ret = c == LOGOR ? TO_NONNULL : TO_NONOP;
    998 	else if (meta == TM_AND)
    999 		ret = c == LOGAND ? TO_NONNULL : TO_NONOP;
   1000 	else if (meta == TM_NOT)
   1001 		ret = (uqword && !strcmp(yylval.cp,
   1002 		    dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
   1003 	else if (meta == TM_OPAREN)
   1004 		ret = (unsigned int)c == ORD('(') /*)*/ ? TO_NONNULL : TO_NONOP;
   1005 	else if (meta == TM_CPAREN)
   1006 		ret = (unsigned int)c == /*(*/ ORD(')') ? TO_NONNULL : TO_NONOP;
   1007 	else if (meta == TM_UNOP || meta == TM_BINOP) {
   1008 		if (meta == TM_BINOP && c == REDIR &&
   1009 		    (yylval.iop->ioflag == IOREAD ||
   1010 		    yylval.iop->ioflag == IOWRITE)) {
   1011 			ret = TO_NONNULL;
   1012 			save = wdcopy(yylval.iop->ioflag == IOREAD ?
   1013 			    db_lthan : db_gthan, ATEMP);
   1014 		} else if (uqword && (ret = test_isop(meta, ident)))
   1015 			save = yylval.cp;
   1016 	} else
   1017 		/* meta == TM_END */
   1018 		ret = (uqword && !strcmp(yylval.cp,
   1019 		    db_close)) ? TO_NONNULL : TO_NONOP;
   1020 	if (ret != TO_NONOP) {
   1021 		ACCEPT;
   1022 		if ((unsigned int)meta < NELEM(dbtest_tokens))
   1023 			save = wdcopy(dbtest_tokens[(int)meta], ATEMP);
   1024 		if (save)
   1025 			XPput(*te->pos.av, save);
   1026 	}
   1027 	return (ret);
   1028 }
   1029 
   1030 static const char *
   1031 dbtestp_getopnd(Test_env *te, Test_op op MKSH_A_UNUSED,
   1032     bool do_eval MKSH_A_UNUSED)
   1033 {
   1034 	int c = tpeek(CMDASN);
   1035 
   1036 	if (c != LWORD)
   1037 		return (NULL);
   1038 
   1039 	ACCEPT;
   1040 	XPput(*te->pos.av, yylval.cp);
   1041 
   1042 	return (null);
   1043 }
   1044 
   1045 static int
   1046 dbtestp_eval(Test_env *te MKSH_A_UNUSED, Test_op op MKSH_A_UNUSED,
   1047     const char *opnd1 MKSH_A_UNUSED, const char *opnd2 MKSH_A_UNUSED,
   1048     bool do_eval MKSH_A_UNUSED)
   1049 {
   1050 	return (1);
   1051 }
   1052 
   1053 static void
   1054 dbtestp_error(Test_env *te, int offset, const char *msg)
   1055 {
   1056 	te->flags |= TEF_ERROR;
   1057 
   1058 	if (offset < 0) {
   1059 		REJECT;
   1060 		/* Kludgy to say the least... */
   1061 		symbol = LWORD;
   1062 		yylval.cp = *(XPptrv(*te->pos.av) + XPsize(*te->pos.av) +
   1063 		    offset);
   1064 	}
   1065 	syntaxerr(msg);
   1066 }
   1067 
   1068 #if HAVE_SELECT
   1069 
   1070 #ifndef EOVERFLOW
   1071 #ifdef ERANGE
   1072 #define EOVERFLOW	ERANGE
   1073 #else
   1074 #define EOVERFLOW	EINVAL
   1075 #endif
   1076 #endif
   1077 
   1078 bool
   1079 parse_usec(const char *s, struct timeval *tv)
   1080 {
   1081 	struct timeval tt;
   1082 	int i;
   1083 
   1084 	tv->tv_sec = 0;
   1085 	/* parse integral part */
   1086 	while (ctype(*s, C_DIGIT)) {
   1087 		tt.tv_sec = tv->tv_sec * 10 + ksh_numdig(*s++);
   1088 		/*XXX this overflow check maybe UB */
   1089 		if (tt.tv_sec / 10 != tv->tv_sec) {
   1090 			errno = EOVERFLOW;
   1091 			return (true);
   1092 		}
   1093 		tv->tv_sec = tt.tv_sec;
   1094 	}
   1095 
   1096 	tv->tv_usec = 0;
   1097 	if (!*s)
   1098 		/* no decimal fraction */
   1099 		return (false);
   1100 	else if (*s++ != '.') {
   1101 		/* junk after integral part */
   1102 		errno = EINVAL;
   1103 		return (true);
   1104 	}
   1105 
   1106 	/* parse decimal fraction */
   1107 	i = 100000;
   1108 	while (ctype(*s, C_DIGIT)) {
   1109 		tv->tv_usec += i * ksh_numdig(*s++);
   1110 		if (i == 1)
   1111 			break;
   1112 		i /= 10;
   1113 	}
   1114 	/* check for junk after fractional part */
   1115 	while (ctype(*s, C_DIGIT))
   1116 		++s;
   1117 	if (*s) {
   1118 		errno = EINVAL;
   1119 		return (true);
   1120 	}
   1121 
   1122 	/* end of input string reached, no errors */
   1123 	return (false);
   1124 }
   1125 #endif
   1126 
   1127 /*
   1128  * Helper function called from within lex.c:yylex() to parse
   1129  * a COMSUB recursively using the main shell parser and lexer
   1130  */
   1131 char *
   1132 yyrecursive(int subtype)
   1133 {
   1134 	struct op *t;
   1135 	char *cp;
   1136 	struct yyrecursive_state *ys;
   1137 	unsigned int stok, etok;
   1138 
   1139 	if (subtype != COMSUB) {
   1140 		stok = ORD('{');
   1141 		etok = ORD('}');
   1142 	} else {
   1143 		stok = ORD('(');
   1144 		etok = ORD(')');
   1145 	}
   1146 
   1147 	ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
   1148 
   1149 	/* tell the lexer to accept a closing parenthesis as EOD */
   1150 	ys->old_nesting_type = subshell_nesting_type;
   1151 	subshell_nesting_type = etok;
   1152 
   1153 	/* push reject state, parse recursively, pop reject state */
   1154 	ys->old_reject = reject;
   1155 	ys->old_symbol = symbol;
   1156 	ACCEPT;
   1157 	memcpy(ys->old_heres, heres, sizeof(heres));
   1158 	ys->old_herep = herep;
   1159 	herep = heres;
   1160 	ys->next = e->yyrecursive_statep;
   1161 	e->yyrecursive_statep = ys;
   1162 	/* we use TPAREN as a helper container here */
   1163 	t = nested(TPAREN, stok, etok, ALIAS);
   1164 	yyrecursive_pop(false);
   1165 
   1166 	/* t->left because nested(TPAREN, ...) hides our goodies there */
   1167 	cp = snptreef(NULL, 0, Tf_T, t->left);
   1168 	tfree(t, ATEMP);
   1169 
   1170 	return (cp);
   1171 }
   1172 
   1173 void
   1174 yyrecursive_pop(bool popall)
   1175 {
   1176 	struct yyrecursive_state *ys;
   1177 
   1178  popnext:
   1179 	if (!(ys = e->yyrecursive_statep))
   1180 		return;
   1181 	e->yyrecursive_statep = ys->next;
   1182 
   1183 	memcpy(heres, ys->old_heres, sizeof(heres));
   1184 	herep = ys->old_herep;
   1185 	reject = ys->old_reject;
   1186 	symbol = ys->old_symbol;
   1187 
   1188 	subshell_nesting_type = ys->old_nesting_type;
   1189 
   1190 	afree(ys, ATEMP);
   1191 	if (popall)
   1192 		goto popnext;
   1193 }
   1194