Home | History | Annotate | Download | only in src
      1 /*	$OpenBSD: lex.c,v 1.49 2013/12/17 16:37:06 deraadt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
      5  *		 2011, 2012, 2013, 2014, 2015
      6  *	Thorsten Glaser <tg (at) mirbsd.org>
      7  *
      8  * Provided that these terms and disclaimer and all copyright notices
      9  * are retained or reproduced in an accompanying document, permission
     10  * is granted to deal in this work without restriction, including un-
     11  * limited rights to use, publicly perform, distribute, sell, modify,
     12  * merge, give away, or sublicence.
     13  *
     14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
     15  * the utmost extent permitted by applicable law, neither express nor
     16  * implied; without malicious intent or gross negligence. In no event
     17  * may a licensor, author or contributor be held liable for indirect,
     18  * direct, other damage, loss, or other issues arising in any way out
     19  * of dealing in the work, even if advised of the possibility of such
     20  * damage or existence of a defect, except proven that it results out
     21  * of said person's immediate fault when using the work as intended.
     22  */
     23 
     24 #include "sh.h"
     25 
     26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.193.2.5 2015/04/19 19:18:19 tg Exp $");
     27 
     28 /*
     29  * states while lexing word
     30  */
     31 #define SBASE		0	/* outside any lexical constructs */
     32 #define SWORD		1	/* implicit quoting for substitute() */
     33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
     34 #define SSQUOTE		3	/* inside '' */
     35 #define SDQUOTE		4	/* inside "" */
     36 #define SEQUOTE		5	/* inside $'' */
     37 #define SBRACE		6	/* inside ${} */
     38 #define SQBRACE		7	/* inside "${}" */
     39 #define SBQUOTE		8	/* inside `` */
     40 #define SASPAREN	9	/* inside $(( )) */
     41 #define SHEREDELIM	10	/* parsing <<,<<-,<<< delimiter */
     42 #define SHEREDQUOTE	11	/* parsing " in <<,<<-,<<< delimiter */
     43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
     44 #define SADELIM		13	/* like SBASE, looking for delimiter */
     45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
     46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
     47 #define SINVALID	255	/* invalid state */
     48 
     49 struct sretrace_info {
     50 	struct sretrace_info *next;
     51 	XString xs;
     52 	char *xp;
     53 };
     54 
     55 /*
     56  * Structure to keep track of the lexing state and the various pieces of info
     57  * needed for each particular state.
     58  */
     59 typedef struct lex_state {
     60 	union {
     61 		/* point to the next state block */
     62 		struct lex_state *base;
     63 		/* marks start of state output in output string */
     64 		int start;
     65 		/* SBQUOTE: true if in double quotes: "`...`" */
     66 		/* SEQUOTE: got NUL, ignore rest of string */
     67 		bool abool;
     68 		/* SADELIM information */
     69 		struct {
     70 			/* character to search for */
     71 			unsigned char delimiter;
     72 			/* max. number of delimiters */
     73 			unsigned char num;
     74 		} adelim;
     75 	} u;
     76 	/* count open parentheses */
     77 	short nparen;
     78 	/* type of this state */
     79 	uint8_t type;
     80 } Lex_state;
     81 #define ls_base		u.base
     82 #define ls_start	u.start
     83 #define ls_bool		u.abool
     84 #define ls_adelim	u.adelim
     85 
     86 typedef struct {
     87 	Lex_state *base;
     88 	Lex_state *end;
     89 } State_info;
     90 
     91 static void readhere(struct ioword *);
     92 static void ungetsc(int);
     93 static void ungetsc_i(int);
     94 static int getsc_uu(void);
     95 static void getsc_line(Source *);
     96 static int getsc_bn(void);
     97 static int s_get(void);
     98 static void s_put(int);
     99 static char *get_brace_var(XString *, char *);
    100 static bool arraysub(char **);
    101 static void gethere(bool);
    102 static Lex_state *push_state_i(State_info *, Lex_state *);
    103 static Lex_state *pop_state_i(State_info *, Lex_state *);
    104 
    105 static int backslash_skip;
    106 static int ignore_backslash_newline;
    107 
    108 /* optimised getsc_bn() */
    109 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
    110 			    !backslash_skip ? *source->str++ : getsc_bn())
    111 /* optimised getsc_uu() */
    112 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
    113 
    114 /* retrace helper */
    115 #define o_getsc_r(carg)	{				\
    116 	int cev = (carg);				\
    117 	struct sretrace_info *rp = retrace_info;	\
    118 							\
    119 	while (rp) {					\
    120 		Xcheck(rp->xs, rp->xp);			\
    121 		*rp->xp++ = cev;			\
    122 		rp = rp->next;				\
    123 	}						\
    124 							\
    125 	return (cev);					\
    126 }
    127 
    128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
    129 static int getsc(void);
    130 
    131 static int
    132 getsc(void)
    133 {
    134 	o_getsc_r(o_getsc());
    135 }
    136 #else
    137 static int getsc_r(int);
    138 
    139 static int
    140 getsc_r(int c)
    141 {
    142 	o_getsc_r(c);
    143 }
    144 
    145 #define getsc()		getsc_r(o_getsc())
    146 #endif
    147 
    148 #define STATE_BSIZE	8
    149 
    150 #define PUSH_STATE(s)	do {					\
    151 	if (++statep == state_info.end)				\
    152 		statep = push_state_i(&state_info, statep);	\
    153 	state = statep->type = (s);				\
    154 } while (/* CONSTCOND */ 0)
    155 
    156 #define POP_STATE()	do {					\
    157 	if (--statep == state_info.base)			\
    158 		statep = pop_state_i(&state_info, statep);	\
    159 	state = statep->type;					\
    160 } while (/* CONSTCOND */ 0)
    161 
    162 #define PUSH_SRETRACE(s) do {					\
    163 	struct sretrace_info *ri;				\
    164 								\
    165 	PUSH_STATE(s);						\
    166 	statep->ls_start = Xsavepos(ws, wp);			\
    167 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
    168 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
    169 	ri->next = retrace_info;				\
    170 	retrace_info = ri;					\
    171 } while (/* CONSTCOND */ 0)
    172 
    173 #define POP_SRETRACE()	do {					\
    174 	wp = Xrestpos(ws, wp, statep->ls_start);		\
    175 	*retrace_info->xp = '\0';				\
    176 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
    177 	dp = (void *)retrace_info;				\
    178 	retrace_info = retrace_info->next;			\
    179 	afree(dp, ATEMP);					\
    180 	POP_STATE();						\
    181 } while (/* CONSTCOND */ 0)
    182 
    183 /**
    184  * Lexical analyser
    185  *
    186  * tokens are not regular expressions, they are LL(1).
    187  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    188  * hence the state stack. Note "$(...)" are now parsed recursively.
    189  */
    190 
    191 int
    192 yylex(int cf)
    193 {
    194 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
    195 	State_info state_info;
    196 	int c, c2, state;
    197 	size_t cz;
    198 	XString ws;		/* expandable output word */
    199 	char *wp;		/* output word pointer */
    200 	char *sp, *dp;
    201 
    202  Again:
    203 	states[0].type = SINVALID;
    204 	states[0].ls_base = NULL;
    205 	statep = &states[1];
    206 	state_info.base = states;
    207 	state_info.end = &state_info.base[STATE_BSIZE];
    208 
    209 	Xinit(ws, wp, 64, ATEMP);
    210 
    211 	backslash_skip = 0;
    212 	ignore_backslash_newline = 0;
    213 
    214 	if (cf & ONEWORD)
    215 		state = SWORD;
    216 	else if (cf & LETEXPR) {
    217 		/* enclose arguments in (double) quotes */
    218 		*wp++ = OQUOTE;
    219 		state = SLETPAREN;
    220 		statep->nparen = 0;
    221 	} else {
    222 		/* normal lexing */
    223 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    224 		while ((c = getsc()) == ' ' || c == '\t')
    225 			;
    226 		if (c == '#') {
    227 			ignore_backslash_newline++;
    228 			while ((c = getsc()) != '\0' && c != '\n')
    229 				;
    230 			ignore_backslash_newline--;
    231 		}
    232 		ungetsc(c);
    233 	}
    234 	if (source->flags & SF_ALIAS) {
    235 		/* trailing ' ' in alias definition */
    236 		source->flags &= ~SF_ALIAS;
    237 		cf |= ALIAS;
    238 	}
    239 
    240 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
    241 	statep->type = state;
    242 
    243 	/* check for here string */
    244 	if (state == SHEREDELIM) {
    245 		c = getsc();
    246 		if (c == '<') {
    247 			state = SHEREDELIM;
    248 			while ((c = getsc()) == ' ' || c == '\t')
    249 				;
    250 			ungetsc(c);
    251 			c = '<';
    252 			goto accept_nonword;
    253 		}
    254 		ungetsc(c);
    255 	}
    256 
    257 	/* collect non-special or quoted characters to form word */
    258 	while (!((c = getsc()) == 0 ||
    259 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
    260 		if (state == SBASE &&
    261 		    subshell_nesting_type == /*{*/ '}' &&
    262 		    c == /*{*/ '}')
    263 			/* possibly end ${ :;} */
    264 			break;
    265  accept_nonword:
    266 		Xcheck(ws, wp);
    267 		switch (state) {
    268 		case SADELIM:
    269 			if (c == '(')
    270 				statep->nparen++;
    271 			else if (c == ')')
    272 				statep->nparen--;
    273 			else if (statep->nparen == 0 && (c == /*{*/ '}' ||
    274 			    c == (int)statep->ls_adelim.delimiter)) {
    275 				*wp++ = ADELIM;
    276 				*wp++ = c;
    277 				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
    278 					POP_STATE();
    279 				if (c == /*{*/ '}')
    280 					POP_STATE();
    281 				break;
    282 			}
    283 			/* FALLTHROUGH */
    284 		case SBASE:
    285 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
    286 				/* temporary */
    287 				*wp = EOS;
    288 				if (is_wdvarname(Xstring(ws, wp), false)) {
    289 					char *p, *tmp;
    290 
    291 					if (arraysub(&tmp)) {
    292 						*wp++ = CHAR;
    293 						*wp++ = c;
    294 						for (p = tmp; *p; ) {
    295 							Xcheck(ws, wp);
    296 							*wp++ = CHAR;
    297 							*wp++ = *p++;
    298 						}
    299 						afree(tmp, ATEMP);
    300 						break;
    301 					} else {
    302 						Source *s;
    303 
    304 						s = pushs(SREREAD,
    305 						    source->areap);
    306 						s->start = s->str =
    307 						    s->u.freeme = tmp;
    308 						s->next = source;
    309 						source = s;
    310 					}
    311 				}
    312 				*wp++ = CHAR;
    313 				*wp++ = c;
    314 				break;
    315 			}
    316 			/* FALLTHROUGH */
    317  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
    318 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
    319 			    c == '!') {
    320 				c2 = getsc();
    321 				if (c2 == '(' /*)*/ ) {
    322 					*wp++ = OPAT;
    323 					*wp++ = c;
    324 					PUSH_STATE(SPATTERN);
    325 					break;
    326 				}
    327 				ungetsc(c2);
    328 			}
    329 			/* FALLTHROUGH */
    330  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
    331 			switch (c) {
    332 			case '\\':
    333  getsc_qchar:
    334 				if ((c = getsc())) {
    335 					/* trailing \ is lost */
    336 					*wp++ = QCHAR;
    337 					*wp++ = c;
    338 				}
    339 				break;
    340 			case '\'':
    341  open_ssquote_unless_heredoc:
    342 				if ((cf & HEREDOC))
    343 					goto store_char;
    344 				*wp++ = OQUOTE;
    345 				ignore_backslash_newline++;
    346 				PUSH_STATE(SSQUOTE);
    347 				break;
    348 			case '"':
    349  open_sdquote:
    350 				*wp++ = OQUOTE;
    351 				PUSH_STATE(SDQUOTE);
    352 				break;
    353 			case '$':
    354 				/*
    355 				 * processing of dollar sign belongs into
    356 				 * Subst, except for those which can open
    357 				 * a string: $'' and $""
    358 				 */
    359  subst_dollar_ex:
    360 				c = getsc();
    361 				switch (c) {
    362 				case '"':
    363 					goto open_sdquote;
    364 				case '\'':
    365 					goto open_sequote;
    366 				default:
    367 					goto SubstS;
    368 				}
    369 			default:
    370 				goto Subst;
    371 			}
    372 			break;
    373 
    374  Subst:
    375 			switch (c) {
    376 			case '\\':
    377 				c = getsc();
    378 				switch (c) {
    379 				case '"':
    380 					if ((cf & HEREDOC))
    381 						goto heredocquote;
    382 					/* FALLTHROUGH */
    383 				case '\\':
    384 				case '$': case '`':
    385  store_qchar:
    386 					*wp++ = QCHAR;
    387 					*wp++ = c;
    388 					break;
    389 				default:
    390  heredocquote:
    391 					Xcheck(ws, wp);
    392 					if (c) {
    393 						/* trailing \ is lost */
    394 						*wp++ = CHAR;
    395 						*wp++ = '\\';
    396 						*wp++ = CHAR;
    397 						*wp++ = c;
    398 					}
    399 					break;
    400 				}
    401 				break;
    402 			case '$':
    403 				c = getsc();
    404  SubstS:
    405 				if (c == '(') /*)*/ {
    406 					c = getsc();
    407 					if (c == '(') /*)*/ {
    408 						*wp++ = EXPRSUB;
    409 						PUSH_SRETRACE(SASPAREN);
    410 						statep->nparen = 2;
    411 						*retrace_info->xp++ = '(';
    412 					} else {
    413 						ungetsc(c);
    414  subst_command:
    415 						c = COMSUB;
    416  subst_command2:
    417 						sp = yyrecursive(c);
    418 						cz = strlen(sp) + 1;
    419 						XcheckN(ws, wp, cz);
    420 						*wp++ = c;
    421 						memcpy(wp, sp, cz);
    422 						wp += cz;
    423 					}
    424 				} else if (c == '{') /*}*/ {
    425 					if ((c = getsc()) == '|') {
    426 						/*
    427 						 * non-subenvironment
    428 						 * value substitution
    429 						 */
    430 						c = VALSUB;
    431 						goto subst_command2;
    432 					} else if (ctype(c, C_IFSWS)) {
    433 						/*
    434 						 * non-subenvironment
    435 						 * "command" substitution
    436 						 */
    437 						c = FUNSUB;
    438 						goto subst_command2;
    439 					}
    440 					ungetsc(c);
    441 					*wp++ = OSUBST;
    442 					*wp++ = '{'; /*}*/
    443 					wp = get_brace_var(&ws, wp);
    444 					c = getsc();
    445 					/* allow :# and :% (ksh88 compat) */
    446 					if (c == ':') {
    447 						*wp++ = CHAR;
    448 						*wp++ = c;
    449 						c = getsc();
    450 						if (c == ':') {
    451 							*wp++ = CHAR;
    452 							*wp++ = '0';
    453 							*wp++ = ADELIM;
    454 							*wp++ = ':';
    455 							PUSH_STATE(SBRACE);
    456 							PUSH_STATE(SADELIM);
    457 							statep->ls_adelim.delimiter = ':';
    458 							statep->ls_adelim.num = 1;
    459 							statep->nparen = 0;
    460 							break;
    461 						} else if (ksh_isdigit(c) ||
    462 						    c == '('/*)*/ || c == ' ' ||
    463 						    /*XXX what else? */
    464 						    c == '$') {
    465 							/* substring subst. */
    466 							if (c != ' ') {
    467 								*wp++ = CHAR;
    468 								*wp++ = ' ';
    469 							}
    470 							ungetsc(c);
    471 							PUSH_STATE(SBRACE);
    472 							PUSH_STATE(SADELIM);
    473 							statep->ls_adelim.delimiter = ':';
    474 							statep->ls_adelim.num = 2;
    475 							statep->nparen = 0;
    476 							break;
    477 						}
    478 					} else if (c == '/') {
    479 						*wp++ = CHAR;
    480 						*wp++ = c;
    481 						if ((c = getsc()) == '/') {
    482 							*wp++ = ADELIM;
    483 							*wp++ = c;
    484 						} else
    485 							ungetsc(c);
    486 						PUSH_STATE(SBRACE);
    487 						PUSH_STATE(SADELIM);
    488 						statep->ls_adelim.delimiter = '/';
    489 						statep->ls_adelim.num = 1;
    490 						statep->nparen = 0;
    491 						break;
    492 					}
    493 					/*
    494 					 * If this is a trim operation,
    495 					 * treat (,|,) specially in STBRACE.
    496 					 */
    497 					if (ctype(c, C_SUBOP2)) {
    498 						ungetsc(c);
    499 						if (Flag(FSH))
    500 							PUSH_STATE(STBRACEBOURNE);
    501 						else
    502 							PUSH_STATE(STBRACEKORN);
    503 					} else {
    504 						ungetsc(c);
    505 						if (state == SDQUOTE ||
    506 						    state == SQBRACE)
    507 							PUSH_STATE(SQBRACE);
    508 						else
    509 							PUSH_STATE(SBRACE);
    510 					}
    511 				} else if (ksh_isalphx(c)) {
    512 					*wp++ = OSUBST;
    513 					*wp++ = 'X';
    514 					do {
    515 						Xcheck(ws, wp);
    516 						*wp++ = c;
    517 						c = getsc();
    518 					} while (ksh_isalnux(c));
    519 					*wp++ = '\0';
    520 					*wp++ = CSUBST;
    521 					*wp++ = 'X';
    522 					ungetsc(c);
    523 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
    524 					Xcheck(ws, wp);
    525 					*wp++ = OSUBST;
    526 					*wp++ = 'X';
    527 					*wp++ = c;
    528 					*wp++ = '\0';
    529 					*wp++ = CSUBST;
    530 					*wp++ = 'X';
    531 				} else {
    532 					*wp++ = CHAR;
    533 					*wp++ = '$';
    534 					ungetsc(c);
    535 				}
    536 				break;
    537 			case '`':
    538  subst_gravis:
    539 				PUSH_STATE(SBQUOTE);
    540 				*wp++ = COMSUB;
    541 				/*
    542 				 * Need to know if we are inside double quotes
    543 				 * since sh/AT&T-ksh translate the \" to " in
    544 				 * "`...\"...`".
    545 				 * This is not done in POSIX mode (section
    546 				 * 3.2.3, Double Quotes: "The backquote shall
    547 				 * retain its special meaning introducing the
    548 				 * other form of command substitution (see
    549 				 * 3.6.3). The portion of the quoted string
    550 				 * from the initial backquote and the
    551 				 * characters up to the next backquote that
    552 				 * is not preceded by a backslash (having
    553 				 * escape characters removed) defines that
    554 				 * command whose output replaces `...` when
    555 				 * the word is expanded."
    556 				 * Section 3.6.3, Command Substitution:
    557 				 * "Within the backquoted style of command
    558 				 * substitution, backslash shall retain its
    559 				 * literal meaning, except when followed by
    560 				 * $ ` \.").
    561 				 */
    562 				statep->ls_bool = false;
    563 				s2 = statep;
    564 				base = state_info.base;
    565 				while (/* CONSTCOND */ 1) {
    566 					for (; s2 != base; s2--) {
    567 						if (s2->type == SDQUOTE) {
    568 							statep->ls_bool = true;
    569 							break;
    570 						}
    571 					}
    572 					if (s2 != base)
    573 						break;
    574 					if (!(s2 = s2->ls_base))
    575 						break;
    576 					base = s2-- - STATE_BSIZE;
    577 				}
    578 				break;
    579 			case QCHAR:
    580 				if (cf & LQCHAR) {
    581 					*wp++ = QCHAR;
    582 					*wp++ = getsc();
    583 					break;
    584 				}
    585 				/* FALLTHROUGH */
    586 			default:
    587  store_char:
    588 				*wp++ = CHAR;
    589 				*wp++ = c;
    590 			}
    591 			break;
    592 
    593 		case SEQUOTE:
    594 			if (c == '\'') {
    595 				POP_STATE();
    596 				*wp++ = CQUOTE;
    597 				ignore_backslash_newline--;
    598 			} else if (c == '\\') {
    599 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
    600 					c2 = s_get();
    601 				if (c2 == 0)
    602 					statep->ls_bool = true;
    603 				if (!statep->ls_bool) {
    604 					char ts[4];
    605 
    606 					if ((unsigned int)c2 < 0x100) {
    607 						*wp++ = QCHAR;
    608 						*wp++ = c2;
    609 					} else {
    610 						cz = utf_wctomb(ts, c2 - 0x100);
    611 						ts[cz] = 0;
    612 						for (cz = 0; ts[cz]; ++cz) {
    613 							*wp++ = QCHAR;
    614 							*wp++ = ts[cz];
    615 						}
    616 					}
    617 				}
    618 			} else if (!statep->ls_bool) {
    619 				*wp++ = QCHAR;
    620 				*wp++ = c;
    621 			}
    622 			break;
    623 
    624 		case SSQUOTE:
    625 			if (c == '\'') {
    626 				POP_STATE();
    627 				if ((cf & HEREDOC) || state == SQBRACE)
    628 					goto store_char;
    629 				*wp++ = CQUOTE;
    630 				ignore_backslash_newline--;
    631 			} else {
    632 				*wp++ = QCHAR;
    633 				*wp++ = c;
    634 			}
    635 			break;
    636 
    637 		case SDQUOTE:
    638 			if (c == '"') {
    639 				POP_STATE();
    640 				*wp++ = CQUOTE;
    641 			} else
    642 				goto Subst;
    643 			break;
    644 
    645 		/* $(( ... )) */
    646 		case SASPAREN:
    647 			if (c == '(')
    648 				statep->nparen++;
    649 			else if (c == ')') {
    650 				statep->nparen--;
    651 				if (statep->nparen == 1) {
    652 					/* end of EXPRSUB */
    653 					POP_SRETRACE();
    654 
    655 					if ((c2 = getsc()) == /*(*/ ')') {
    656 						cz = strlen(sp) - 2;
    657 						XcheckN(ws, wp, cz);
    658 						memcpy(wp, sp + 1, cz);
    659 						wp += cz;
    660 						afree(sp, ATEMP);
    661 						*wp++ = '\0';
    662 						break;
    663 					} else {
    664 						Source *s;
    665 
    666 						ungetsc(c2);
    667 						/*
    668 						 * mismatched parenthesis -
    669 						 * assume we were really
    670 						 * parsing a $(...) expression
    671 						 */
    672 						--wp;
    673 						s = pushs(SREREAD,
    674 						    source->areap);
    675 						s->start = s->str =
    676 						    s->u.freeme = sp;
    677 						s->next = source;
    678 						source = s;
    679 						goto subst_command;
    680 					}
    681 				}
    682 			}
    683 			/* reuse existing state machine */
    684 			goto Sbase2;
    685 
    686 		case SQBRACE:
    687 			if (c == '\\') {
    688 				/*
    689 				 * perform POSIX "quote removal" if the back-
    690 				 * slash is "special", i.e. same cases as the
    691 				 * {case '\\':} in Subst: plus closing brace;
    692 				 * in mksh code "quote removal" on '\c' means
    693 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
    694 				 * emitted (in heredocquote:)
    695 				 */
    696 				if ((c = getsc()) == '"' || c == '\\' ||
    697 				    c == '$' || c == '`' || c == /*{*/'}')
    698 					goto store_qchar;
    699 				goto heredocquote;
    700 			}
    701 			goto common_SQBRACE;
    702 
    703 		case SBRACE:
    704 			if (c == '\'')
    705 				goto open_ssquote_unless_heredoc;
    706 			else if (c == '\\')
    707 				goto getsc_qchar;
    708  common_SQBRACE:
    709 			if (c == '"')
    710 				goto open_sdquote;
    711 			else if (c == '$')
    712 				goto subst_dollar_ex;
    713 			else if (c == '`')
    714 				goto subst_gravis;
    715 			else if (c != /*{*/ '}')
    716 				goto store_char;
    717 			POP_STATE();
    718 			*wp++ = CSUBST;
    719 			*wp++ = /*{*/ '}';
    720 			break;
    721 
    722 		/* Same as SBASE, except (,|,) treated specially */
    723 		case STBRACEKORN:
    724 			if (c == '|')
    725 				*wp++ = SPAT;
    726 			else if (c == '(') {
    727 				*wp++ = OPAT;
    728 				/* simile for @ */
    729 				*wp++ = ' ';
    730 				PUSH_STATE(SPATTERN);
    731 			} else /* FALLTHROUGH */
    732 		case STBRACEBOURNE:
    733 			  if (c == /*{*/ '}') {
    734 				POP_STATE();
    735 				*wp++ = CSUBST;
    736 				*wp++ = /*{*/ '}';
    737 			} else
    738 				goto Sbase1;
    739 			break;
    740 
    741 		case SBQUOTE:
    742 			if (c == '`') {
    743 				*wp++ = 0;
    744 				POP_STATE();
    745 			} else if (c == '\\') {
    746 				switch (c = getsc()) {
    747 				case 0:
    748 					/* trailing \ is lost */
    749 					break;
    750 				case '\\':
    751 				case '$': case '`':
    752 					*wp++ = c;
    753 					break;
    754 				case '"':
    755 					if (statep->ls_bool) {
    756 						*wp++ = c;
    757 						break;
    758 					}
    759 					/* FALLTHROUGH */
    760 				default:
    761 					*wp++ = '\\';
    762 					*wp++ = c;
    763 					break;
    764 				}
    765 			} else
    766 				*wp++ = c;
    767 			break;
    768 
    769 		/* ONEWORD */
    770 		case SWORD:
    771 			goto Subst;
    772 
    773 		/* LETEXPR: (( ... )) */
    774 		case SLETPAREN:
    775 			if (c == /*(*/ ')') {
    776 				if (statep->nparen > 0)
    777 					--statep->nparen;
    778 				else if ((c2 = getsc()) == /*(*/ ')') {
    779 					c = 0;
    780 					*wp++ = CQUOTE;
    781 					goto Done;
    782 				} else {
    783 					Source *s;
    784 
    785 					ungetsc(c2);
    786 					/*
    787 					 * mismatched parenthesis -
    788 					 * assume we were really
    789 					 * parsing a (...) expression
    790 					 */
    791 					*wp = EOS;
    792 					sp = Xstring(ws, wp);
    793 					dp = wdstrip(sp, WDS_KEEPQ);
    794 					s = pushs(SREREAD, source->areap);
    795 					s->start = s->str = s->u.freeme = dp;
    796 					s->next = source;
    797 					source = s;
    798 					return ('('/*)*/);
    799 				}
    800 			} else if (c == '(')
    801 				/*
    802 				 * parentheses inside quotes and
    803 				 * backslashes are lost, but AT&T ksh
    804 				 * doesn't count them either
    805 				 */
    806 				++statep->nparen;
    807 			goto Sbase2;
    808 
    809 		/* <<, <<-, <<< delimiter */
    810 		case SHEREDELIM:
    811 			/*
    812 			 * here delimiters need a special case since
    813 			 * $ and `...` are not to be treated specially
    814 			 */
    815 			switch (c) {
    816 			case '\\':
    817 				if ((c = getsc())) {
    818 					/* trailing \ is lost */
    819 					*wp++ = QCHAR;
    820 					*wp++ = c;
    821 				}
    822 				break;
    823 			case '\'':
    824 				goto open_ssquote_unless_heredoc;
    825 			case '$':
    826 				if ((c2 = getsc()) == '\'') {
    827  open_sequote:
    828 					*wp++ = OQUOTE;
    829 					ignore_backslash_newline++;
    830 					PUSH_STATE(SEQUOTE);
    831 					statep->ls_bool = false;
    832 					break;
    833 				} else if (c2 == '"') {
    834 					/* FALLTHROUGH */
    835 			case '"':
    836 					PUSH_SRETRACE(SHEREDQUOTE);
    837 					break;
    838 				}
    839 				ungetsc(c2);
    840 				/* FALLTHROUGH */
    841 			default:
    842 				*wp++ = CHAR;
    843 				*wp++ = c;
    844 			}
    845 			break;
    846 
    847 		/* " in <<, <<-, <<< delimiter */
    848 		case SHEREDQUOTE:
    849 			if (c != '"')
    850 				goto Subst;
    851 			POP_SRETRACE();
    852 			dp = strnul(sp) - 1;
    853 			/* remove the trailing double quote */
    854 			*dp = '\0';
    855 			/* store the quoted string */
    856 			*wp++ = OQUOTE;
    857 			XcheckN(ws, wp, (dp - sp) * 2);
    858 			dp = sp;
    859 			while ((c = *dp++)) {
    860 				if (c == '\\') {
    861 					switch ((c = *dp++)) {
    862 					case '\\':
    863 					case '"':
    864 					case '$':
    865 					case '`':
    866 						break;
    867 					default:
    868 						*wp++ = CHAR;
    869 						*wp++ = '\\';
    870 						break;
    871 					}
    872 				}
    873 				*wp++ = CHAR;
    874 				*wp++ = c;
    875 			}
    876 			afree(sp, ATEMP);
    877 			*wp++ = CQUOTE;
    878 			state = statep->type = SHEREDELIM;
    879 			break;
    880 
    881 		/* in *(...|...) pattern (*+?@!) */
    882 		case SPATTERN:
    883 			if (c == /*(*/ ')') {
    884 				*wp++ = CPAT;
    885 				POP_STATE();
    886 			} else if (c == '|') {
    887 				*wp++ = SPAT;
    888 			} else if (c == '(') {
    889 				*wp++ = OPAT;
    890 				/* simile for @ */
    891 				*wp++ = ' ';
    892 				PUSH_STATE(SPATTERN);
    893 			} else
    894 				goto Sbase1;
    895 			break;
    896 		}
    897 	}
    898  Done:
    899 	Xcheck(ws, wp);
    900 	if (statep != &states[1])
    901 		/* XXX figure out what is missing */
    902 		yyerror("no closing quote\n");
    903 
    904 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    905 	if (state == SHEREDELIM)
    906 		state = SBASE;
    907 
    908 	dp = Xstring(ws, wp);
    909 	if (state == SBASE && (
    910 #ifndef MKSH_LEGACY_MODE
    911 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
    912 #endif
    913 	    c == '<' || c == '>')) {
    914 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
    915 
    916 		if (Xlength(ws, wp) == 0)
    917 			iop->unit = c == '<' ? 0 : 1;
    918 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
    919 			if (dp[c2] != CHAR)
    920 				goto no_iop;
    921 			if (!ksh_isdigit(dp[c2 + 1]))
    922 				goto no_iop;
    923 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
    924 			if (iop->unit >= FDBASE)
    925 				goto no_iop;
    926 		}
    927 
    928 		if (c == '&') {
    929 			if ((c2 = getsc()) != '>') {
    930 				ungetsc(c2);
    931 				goto no_iop;
    932 			}
    933 			c = c2;
    934 			iop->ioflag = IOBASH;
    935 		} else
    936 			iop->ioflag = 0;
    937 
    938 		c2 = getsc();
    939 		/* <<, >>, <> are ok, >< is not */
    940 		if (c == c2 || (c == '<' && c2 == '>')) {
    941 			iop->ioflag |= c == c2 ?
    942 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
    943 			if (iop->ioflag == IOHERE) {
    944 				if ((c2 = getsc()) == '-') {
    945 					iop->ioflag |= IOSKIP;
    946 					c2 = getsc();
    947 				} else if (c2 == '<')
    948 					iop->ioflag |= IOHERESTR;
    949 				ungetsc(c2);
    950 				if (c2 == '\n')
    951 					iop->ioflag |= IONDELIM;
    952 			}
    953 		} else if (c2 == '&')
    954 			iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
    955 		else {
    956 			iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
    957 			if (c == '>' && c2 == '|')
    958 				iop->ioflag |= IOCLOB;
    959 			else
    960 				ungetsc(c2);
    961 		}
    962 
    963 		iop->name = NULL;
    964 		iop->delim = NULL;
    965 		iop->heredoc = NULL;
    966 		/* free word */
    967 		Xfree(ws, wp);
    968 		yylval.iop = iop;
    969 		return (REDIR);
    970  no_iop:
    971 		afree(iop, ATEMP);
    972 	}
    973 
    974 	if (wp == dp && state == SBASE) {
    975 		/* free word */
    976 		Xfree(ws, wp);
    977 		/* no word, process LEX1 character */
    978 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
    979 			if ((c2 = getsc()) == c)
    980 				c = (c == ';') ? BREAK :
    981 				    (c == '|') ? LOGOR :
    982 				    (c == '&') ? LOGAND :
    983 				    /* c == '(' ) */ MDPAREN;
    984 			else if (c == '|' && c2 == '&')
    985 				c = COPROC;
    986 			else if (c == ';' && c2 == '|')
    987 				c = BRKEV;
    988 			else if (c == ';' && c2 == '&')
    989 				c = BRKFT;
    990 			else
    991 				ungetsc(c2);
    992 #ifndef MKSH_SMALL
    993 			if (c == BREAK) {
    994 				if ((c2 = getsc()) == '&')
    995 					c = BRKEV;
    996 				else
    997 					ungetsc(c2);
    998 			}
    999 #endif
   1000 		} else if (c == '\n') {
   1001 			gethere(false);
   1002 			if (cf & CONTIN)
   1003 				goto Again;
   1004 		} else if (c == '\0')
   1005 			/* need here strings at EOF */
   1006 			gethere(true);
   1007 		return (c);
   1008 	}
   1009 
   1010 	/* terminate word */
   1011 	*wp++ = EOS;
   1012 	yylval.cp = Xclose(ws, wp);
   1013 	if (state == SWORD || state == SLETPAREN
   1014 	    /* XXX ONEWORD? */)
   1015 		return (LWORD);
   1016 
   1017 	/* unget terminator */
   1018 	ungetsc(c);
   1019 
   1020 	/*
   1021 	 * note: the alias-vs-function code below depends on several
   1022 	 * interna: starting from here, source->str is not modified;
   1023 	 * the way getsc() and ungetsc() operate; etc.
   1024 	 */
   1025 
   1026 	/* copy word to unprefixed string ident */
   1027 	sp = yylval.cp;
   1028 	dp = ident;
   1029 	if ((cf & HEREDELIM) && (sp[1] == '<')) {
   1030  herestringloop:
   1031 		switch ((c = *sp++)) {
   1032 		case CHAR:
   1033 			++sp;
   1034 			/* FALLTHROUGH */
   1035 		case OQUOTE:
   1036 		case CQUOTE:
   1037 			goto herestringloop;
   1038 		default:
   1039 			break;
   1040 		}
   1041 		/* dummy value */
   1042 		*dp++ = 'x';
   1043 	} else
   1044 		while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
   1045 			*dp++ = *sp++;
   1046 	if (c != EOS)
   1047 		/* word is not unquoted */
   1048 		dp = ident;
   1049 	/* make sure the ident array stays NUL padded */
   1050 	memset(dp, 0, (ident + IDENT) - dp + 1);
   1051 
   1052 	if (!(cf & (KEYWORD | ALIAS)))
   1053 		return (LWORD);
   1054 
   1055 	if (*ident != '\0') {
   1056 		struct tbl *p;
   1057 		uint32_t h = hash(ident);
   1058 
   1059 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
   1060 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
   1061 		    p->val.i == /*{*/ '}')) {
   1062 			afree(yylval.cp, ATEMP);
   1063 			return (p->val.i);
   1064 		}
   1065 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
   1066 		    (p->flag & ISSET)) {
   1067 			/*
   1068 			 * this still points to the same character as the
   1069 			 * ungetsc'd terminator from above
   1070 			 */
   1071 			const char *cp = source->str;
   1072 
   1073 			/* prefer POSIX but not Korn functions over aliases */
   1074 			while (*cp == ' ' || *cp == '\t')
   1075 				/*
   1076 				 * this is like getsc() without skipping
   1077 				 * over Source boundaries (including not
   1078 				 * parsing ungetsc'd characters that got
   1079 				 * pushed into an SREREAD) which is what
   1080 				 * we want here anyway: find out whether
   1081 				 * the alias name is followed by a POSIX
   1082 				 * function definition
   1083 				 */
   1084 				++cp;
   1085 			/* prefer functions over aliases */
   1086 			if (cp[0] != '(' || cp[1] != ')') {
   1087 				Source *s = source;
   1088 
   1089 				while (s && (s->flags & SF_HASALIAS))
   1090 					if (s->u.tblp == p)
   1091 						return (LWORD);
   1092 					else
   1093 						s = s->next;
   1094 				/* push alias expansion */
   1095 				s = pushs(SALIAS, source->areap);
   1096 				s->start = s->str = p->val.s;
   1097 				s->u.tblp = p;
   1098 				s->flags |= SF_HASALIAS;
   1099 				s->next = source;
   1100 				if (source->type == SEOF) {
   1101 					/* prevent infinite recursion at EOS */
   1102 					source->u.tblp = p;
   1103 					source->flags |= SF_HASALIAS;
   1104 				}
   1105 				source = s;
   1106 				afree(yylval.cp, ATEMP);
   1107 				goto Again;
   1108 			}
   1109 		}
   1110 	} else if (cf & ALIAS) {
   1111 		/* retain typeset et al. even when quoted */
   1112 		if (assign_command((dp = wdstrip(yylval.cp, 0))))
   1113 			strlcpy(ident, dp, sizeof(ident));
   1114 		afree(dp, ATEMP);
   1115 	}
   1116 
   1117 	return (LWORD);
   1118 }
   1119 
   1120 static void
   1121 gethere(bool iseof)
   1122 {
   1123 	struct ioword **p;
   1124 
   1125 	for (p = heres; p < herep; p++)
   1126 		if (iseof && !((*p)->ioflag & IOHERESTR))
   1127 			/* only here strings at EOF */
   1128 			return;
   1129 		else
   1130 			readhere(*p);
   1131 	herep = heres;
   1132 }
   1133 
   1134 /*
   1135  * read "<<word" text into temp file
   1136  */
   1137 
   1138 static void
   1139 readhere(struct ioword *iop)
   1140 {
   1141 	int c;
   1142 	const char *eof, *eofp;
   1143 	XString xs;
   1144 	char *xp;
   1145 	int xpos;
   1146 
   1147 	if (iop->ioflag & IOHERESTR) {
   1148 		/* process the here string */
   1149 		iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
   1150 		xpos = strlen(xp) - 1;
   1151 		memmove(xp, xp + 1, xpos);
   1152 		xp[xpos] = '\n';
   1153 		return;
   1154 	}
   1155 
   1156 	eof = iop->ioflag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
   1157 
   1158 	if (!(iop->ioflag & IOEVAL))
   1159 		ignore_backslash_newline++;
   1160 
   1161 	Xinit(xs, xp, 256, ATEMP);
   1162 
   1163  heredoc_read_line:
   1164 	/* beginning of line */
   1165 	eofp = eof;
   1166 	xpos = Xsavepos(xs, xp);
   1167 	if (iop->ioflag & IOSKIP) {
   1168 		/* skip over leading tabs */
   1169 		while ((c = getsc()) == '\t')
   1170 			;	/* nothing */
   1171 		goto heredoc_parse_char;
   1172 	}
   1173  heredoc_read_char:
   1174 	c = getsc();
   1175  heredoc_parse_char:
   1176 	/* compare with here document marker */
   1177 	if (!*eofp) {
   1178 		/* end of here document marker, what to do? */
   1179 		switch (c) {
   1180 		case /*(*/ ')':
   1181 			if (!subshell_nesting_type)
   1182 				/*-
   1183 				 * not allowed outside $(...) or (...)
   1184 				 * => mismatch
   1185 				 */
   1186 				break;
   1187 			/* allow $(...) or (...) to close here */
   1188 			ungetsc(/*(*/ ')');
   1189 			/* FALLTHROUGH */
   1190 		case 0:
   1191 			/*
   1192 			 * Allow EOF here to commands without trailing
   1193 			 * newlines (mksh -c '...') will work as well.
   1194 			 */
   1195 		case '\n':
   1196 			/* Newline terminates here document marker */
   1197 			goto heredoc_found_terminator;
   1198 		}
   1199 	} else if (c == *eofp++)
   1200 		/* store; then read and compare next character */
   1201 		goto heredoc_store_and_loop;
   1202 	/* nope, mismatch; read until end of line */
   1203 	while (c != '\n') {
   1204 		if (!c)
   1205 			/* oops, reached EOF */
   1206 			yyerror("%s '%s' unclosed\n", "here document", eof);
   1207 		/* store character */
   1208 		Xcheck(xs, xp);
   1209 		Xput(xs, xp, c);
   1210 		/* read next character */
   1211 		c = getsc();
   1212 	}
   1213 	/* we read a newline as last character */
   1214  heredoc_store_and_loop:
   1215 	/* store character */
   1216 	Xcheck(xs, xp);
   1217 	Xput(xs, xp, c);
   1218 	if (c == '\n')
   1219 		goto heredoc_read_line;
   1220 	goto heredoc_read_char;
   1221 
   1222  heredoc_found_terminator:
   1223 	/* jump back to saved beginning of line */
   1224 	xp = Xrestpos(xs, xp, xpos);
   1225 	/* terminate, close and store */
   1226 	Xput(xs, xp, '\0');
   1227 	iop->heredoc = Xclose(xs, xp);
   1228 
   1229 	if (!(iop->ioflag & IOEVAL))
   1230 		ignore_backslash_newline--;
   1231 }
   1232 
   1233 void
   1234 yyerror(const char *fmt, ...)
   1235 {
   1236 	va_list va;
   1237 
   1238 	/* pop aliases and re-reads */
   1239 	while (source->type == SALIAS || source->type == SREREAD)
   1240 		source = source->next;
   1241 	/* zap pending input */
   1242 	source->str = null;
   1243 
   1244 	error_prefix(true);
   1245 	va_start(va, fmt);
   1246 	shf_vfprintf(shl_out, fmt, va);
   1247 	va_end(va);
   1248 	errorfz();
   1249 }
   1250 
   1251 /*
   1252  * input for yylex with alias expansion
   1253  */
   1254 
   1255 Source *
   1256 pushs(int type, Area *areap)
   1257 {
   1258 	Source *s;
   1259 
   1260 	s = alloc(sizeof(Source), areap);
   1261 	memset(s, 0, sizeof(Source));
   1262 	s->type = type;
   1263 	s->str = null;
   1264 	s->areap = areap;
   1265 	if (type == SFILE || type == SSTDIN)
   1266 		XinitN(s->xs, 256, s->areap);
   1267 	return (s);
   1268 }
   1269 
   1270 static int
   1271 getsc_uu(void)
   1272 {
   1273 	Source *s = source;
   1274 	int c;
   1275 
   1276 	while ((c = *s->str++) == 0) {
   1277 		/* return 0 for EOF by default */
   1278 		s->str = NULL;
   1279 		switch (s->type) {
   1280 		case SEOF:
   1281 			s->str = null;
   1282 			return (0);
   1283 
   1284 		case SSTDIN:
   1285 		case SFILE:
   1286 			getsc_line(s);
   1287 			break;
   1288 
   1289 		case SWSTR:
   1290 			break;
   1291 
   1292 		case SSTRING:
   1293 		case SSTRINGCMDLINE:
   1294 			break;
   1295 
   1296 		case SWORDS:
   1297 			s->start = s->str = *s->u.strv++;
   1298 			s->type = SWORDSEP;
   1299 			break;
   1300 
   1301 		case SWORDSEP:
   1302 			if (*s->u.strv == NULL) {
   1303 				s->start = s->str = "\n";
   1304 				s->type = SEOF;
   1305 			} else {
   1306 				s->start = s->str = " ";
   1307 				s->type = SWORDS;
   1308 			}
   1309 			break;
   1310 
   1311 		case SALIAS:
   1312 			if (s->flags & SF_ALIASEND) {
   1313 				/* pass on an unused SF_ALIAS flag */
   1314 				source = s->next;
   1315 				source->flags |= s->flags & SF_ALIAS;
   1316 				s = source;
   1317 			} else if (*s->u.tblp->val.s &&
   1318 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
   1319 				/* pop source stack */
   1320 				source = s = s->next;
   1321 				/*
   1322 				 * Note that this alias ended with a
   1323 				 * space, enabling alias expansion on
   1324 				 * the following word.
   1325 				 */
   1326 				s->flags |= SF_ALIAS;
   1327 			} else {
   1328 				/*
   1329 				 * At this point, we need to keep the current
   1330 				 * alias in the source list so recursive
   1331 				 * aliases can be detected and we also need to
   1332 				 * return the next character. Do this by
   1333 				 * temporarily popping the alias to get the
   1334 				 * next character and then put it back in the
   1335 				 * source list with the SF_ALIASEND flag set.
   1336 				 */
   1337 				/* pop source stack */
   1338 				source = s->next;
   1339 				source->flags |= s->flags & SF_ALIAS;
   1340 				c = getsc_uu();
   1341 				if (c) {
   1342 					s->flags |= SF_ALIASEND;
   1343 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1344 					s->start = s->str = s->ugbuf;
   1345 					s->next = source;
   1346 					source = s;
   1347 				} else {
   1348 					s = source;
   1349 					/* avoid reading EOF twice */
   1350 					s->str = NULL;
   1351 					break;
   1352 				}
   1353 			}
   1354 			continue;
   1355 
   1356 		case SREREAD:
   1357 			if (s->start != s->ugbuf)
   1358 				/* yuck */
   1359 				afree(s->u.freeme, ATEMP);
   1360 			source = s = s->next;
   1361 			continue;
   1362 		}
   1363 		if (s->str == NULL) {
   1364 			s->type = SEOF;
   1365 			s->start = s->str = null;
   1366 			return ('\0');
   1367 		}
   1368 		if (s->flags & SF_ECHO) {
   1369 			shf_puts(s->str, shl_out);
   1370 			shf_flush(shl_out);
   1371 		}
   1372 	}
   1373 	return (c);
   1374 }
   1375 
   1376 static void
   1377 getsc_line(Source *s)
   1378 {
   1379 	char *xp = Xstring(s->xs, xp), *cp;
   1380 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
   1381 	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
   1382 
   1383 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1384 	XcheckN(s->xs, xp, LINE);
   1385 	*xp = '\0';
   1386 	s->start = s->str = xp;
   1387 
   1388 	if (have_tty && ksh_tmout) {
   1389 		ksh_tmout_state = TMOUT_READING;
   1390 		alarm(ksh_tmout);
   1391 	}
   1392 	if (interactive)
   1393 		change_winsz();
   1394 #ifndef MKSH_NO_CMDLINE_EDITING
   1395 	if (have_tty && (
   1396 #if !MKSH_S_NOVI
   1397 	    Flag(FVI) ||
   1398 #endif
   1399 	    Flag(FEMACS) || Flag(FGMACS))) {
   1400 		int nread;
   1401 
   1402 		nread = x_read(xp);
   1403 		if (nread < 0)
   1404 			/* read error */
   1405 			nread = 0;
   1406 		xp[nread] = '\0';
   1407 		xp += nread;
   1408 	} else
   1409 #endif
   1410 	  {
   1411 		if (interactive)
   1412 			pprompt(prompt, 0);
   1413 		else
   1414 			s->line++;
   1415 
   1416 		while (/* CONSTCOND */ 1) {
   1417 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1418 
   1419 			if (!p && shf_error(s->u.shf) &&
   1420 			    shf_errno(s->u.shf) == EINTR) {
   1421 				shf_clearerr(s->u.shf);
   1422 				if (trap)
   1423 					runtraps(0);
   1424 				continue;
   1425 			}
   1426 			if (!p || (xp = p, xp[-1] == '\n'))
   1427 				break;
   1428 			/* double buffer size */
   1429 			/* move past NUL so doubling works... */
   1430 			xp++;
   1431 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1432 			/* ...and move back again */
   1433 			xp--;
   1434 		}
   1435 		/*
   1436 		 * flush any unwanted input so other programs/builtins
   1437 		 * can read it. Not very optimal, but less error prone
   1438 		 * than flushing else where, dealing with redirections,
   1439 		 * etc.
   1440 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
   1441 		 */
   1442 		if (s->type == SSTDIN)
   1443 			shf_flush(s->u.shf);
   1444 	}
   1445 	/*
   1446 	 * XXX: temporary kludge to restore source after a
   1447 	 * trap may have been executed.
   1448 	 */
   1449 	source = s;
   1450 	if (have_tty && ksh_tmout) {
   1451 		ksh_tmout_state = TMOUT_EXECUTING;
   1452 		alarm(0);
   1453 	}
   1454 	cp = Xstring(s->xs, xp);
   1455 	rndpush(cp);
   1456 	s->start = s->str = cp;
   1457 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1458 	/* Note: if input is all nulls, this is not eof */
   1459 	if (Xlength(s->xs, xp) == 0) {
   1460 		/* EOF */
   1461 		if (s->type == SFILE)
   1462 			shf_fdclose(s->u.shf);
   1463 		s->str = NULL;
   1464 	} else if (interactive && *s->str &&
   1465 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
   1466 		histsave(&s->line, s->str, true, true);
   1467 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
   1468 	} else if (interactive && cur_prompt == PS1) {
   1469 		cp = Xstring(s->xs, xp);
   1470 		while (*cp && ctype(*cp, C_IFSWS))
   1471 			++cp;
   1472 		if (!*cp)
   1473 			histsync();
   1474 #endif
   1475 	}
   1476 	if (interactive)
   1477 		set_prompt(PS2, NULL);
   1478 }
   1479 
   1480 void
   1481 set_prompt(int to, Source *s)
   1482 {
   1483 	cur_prompt = (uint8_t)to;
   1484 
   1485 	switch (to) {
   1486 	/* command */
   1487 	case PS1:
   1488 		/*
   1489 		 * Substitute ! and !! here, before substitutions are done
   1490 		 * so ! in expanded variables are not expanded.
   1491 		 * NOTE: this is not what AT&T ksh does (it does it after
   1492 		 * substitutions, POSIX doesn't say which is to be done.
   1493 		 */
   1494 		{
   1495 			struct shf *shf;
   1496 			char * volatile ps1;
   1497 			Area *saved_atemp;
   1498 
   1499 			ps1 = str_val(global("PS1"));
   1500 			shf = shf_sopen(NULL, strlen(ps1) * 2,
   1501 			    SHF_WR | SHF_DYNAMIC, NULL);
   1502 			while (*ps1)
   1503 				if (*ps1 != '!' || *++ps1 == '!')
   1504 					shf_putchar(*ps1++, shf);
   1505 				else
   1506 					shf_fprintf(shf, "%lu", s ?
   1507 					    (unsigned long)s->line + 1 : 0UL);
   1508 			ps1 = shf_sclose(shf);
   1509 			saved_atemp = ATEMP;
   1510 			newenv(E_ERRH);
   1511 			if (kshsetjmp(e->jbuf)) {
   1512 				prompt = safe_prompt;
   1513 				/*
   1514 				 * Don't print an error - assume it has already
   1515 				 * been printed. Reason is we may have forked
   1516 				 * to run a command and the child may be
   1517 				 * unwinding its stack through this code as it
   1518 				 * exits.
   1519 				 */
   1520 			} else {
   1521 				char *cp = substitute(ps1, 0);
   1522 				strdupx(prompt, cp, saved_atemp);
   1523 			}
   1524 			quitenv(NULL);
   1525 		}
   1526 		break;
   1527 	/* command continuation */
   1528 	case PS2:
   1529 		prompt = str_val(global("PS2"));
   1530 		break;
   1531 	}
   1532 }
   1533 
   1534 int
   1535 pprompt(const char *cp, int ntruncate)
   1536 {
   1537 	char delimiter = 0;
   1538 	bool doprint = (ntruncate != -1);
   1539 	bool indelimit = false;
   1540 	int columns = 0, lines = 0;
   1541 
   1542 	/*
   1543 	 * Undocumented AT&T ksh feature:
   1544 	 * If the second char in the prompt string is \r then the first
   1545 	 * char is taken to be a non-printing delimiter and any chars
   1546 	 * between two instances of the delimiter are not considered to
   1547 	 * be part of the prompt length
   1548 	 */
   1549 	if (*cp && cp[1] == '\r') {
   1550 		delimiter = *cp;
   1551 		cp += 2;
   1552 	}
   1553 	for (; *cp; cp++) {
   1554 		if (indelimit && *cp != delimiter)
   1555 			;
   1556 		else if (*cp == '\n' || *cp == '\r') {
   1557 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
   1558 			columns = 0;
   1559 		} else if (*cp == '\t') {
   1560 			columns = (columns | 7) + 1;
   1561 		} else if (*cp == '\b') {
   1562 			if (columns > 0)
   1563 				columns--;
   1564 		} else if (*cp == delimiter)
   1565 			indelimit = !indelimit;
   1566 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
   1567 			const char *cp2;
   1568 			columns += utf_widthadj(cp, &cp2);
   1569 			if (doprint && (indelimit ||
   1570 			    (ntruncate < (x_cols * lines + columns))))
   1571 				shf_write(cp, cp2 - cp, shl_out);
   1572 			cp = cp2 - /* loop increment */ 1;
   1573 			continue;
   1574 		} else
   1575 			columns++;
   1576 		if (doprint && (*cp != delimiter) &&
   1577 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
   1578 			shf_putc(*cp, shl_out);
   1579 	}
   1580 	if (doprint)
   1581 		shf_flush(shl_out);
   1582 	return (x_cols * lines + columns);
   1583 }
   1584 
   1585 /*
   1586  * Read the variable part of a ${...} expression (i.e. up to but not
   1587  * including the :[-+?=#%] or close-brace).
   1588  */
   1589 static char *
   1590 get_brace_var(XString *wsp, char *wp)
   1591 {
   1592 	char c;
   1593 	enum parse_state {
   1594 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
   1595 		PS_NUMBER, PS_VAR1
   1596 	} state = PS_INITIAL;
   1597 
   1598 	while (/* CONSTCOND */ 1) {
   1599 		c = getsc();
   1600 		/* State machine to figure out where the variable part ends. */
   1601 		switch (state) {
   1602 		case PS_INITIAL:
   1603 			if (c == '#' || c == '!' || c == '%') {
   1604 				state = PS_SAW_HASH;
   1605 				break;
   1606 			}
   1607 			/* FALLTHROUGH */
   1608 		case PS_SAW_HASH:
   1609 			if (ksh_isalphx(c))
   1610 				state = PS_IDENT;
   1611 			else if (ksh_isdigit(c))
   1612 				state = PS_NUMBER;
   1613 			else if (c == '#') {
   1614 				if (state == PS_SAW_HASH) {
   1615 					char c2;
   1616 
   1617 					c2 = getsc();
   1618 					ungetsc(c2);
   1619 					if (c2 != /*{*/ '}') {
   1620 						ungetsc(c);
   1621 						goto out;
   1622 					}
   1623 				}
   1624 				state = PS_VAR1;
   1625 			} else if (ctype(c, C_VAR1))
   1626 				state = PS_VAR1;
   1627 			else
   1628 				goto out;
   1629 			break;
   1630 		case PS_IDENT:
   1631 			if (!ksh_isalnux(c)) {
   1632 				if (c == '[') {
   1633 					char *tmp, *p;
   1634 
   1635 					if (!arraysub(&tmp))
   1636 						yyerror("missing ]\n");
   1637 					*wp++ = c;
   1638 					for (p = tmp; *p; ) {
   1639 						Xcheck(*wsp, wp);
   1640 						*wp++ = *p++;
   1641 					}
   1642 					afree(tmp, ATEMP);
   1643 					/* the ] */
   1644 					c = getsc();
   1645 				}
   1646 				goto out;
   1647 			}
   1648 			break;
   1649 		case PS_NUMBER:
   1650 			if (!ksh_isdigit(c))
   1651 				goto out;
   1652 			break;
   1653 		case PS_VAR1:
   1654 			goto out;
   1655 		}
   1656 		Xcheck(*wsp, wp);
   1657 		*wp++ = c;
   1658 	}
   1659  out:
   1660 	/* end of variable part */
   1661 	*wp++ = '\0';
   1662 	ungetsc(c);
   1663 	return (wp);
   1664 }
   1665 
   1666 /*
   1667  * Save an array subscript - returns true if matching bracket found, false
   1668  * if eof or newline was found.
   1669  * (Returned string double null terminated)
   1670  */
   1671 static bool
   1672 arraysub(char **strp)
   1673 {
   1674 	XString ws;
   1675 	char *wp, c;
   1676 	/* we are just past the initial [ */
   1677 	unsigned int depth = 1;
   1678 
   1679 	Xinit(ws, wp, 32, ATEMP);
   1680 
   1681 	do {
   1682 		c = getsc();
   1683 		Xcheck(ws, wp);
   1684 		*wp++ = c;
   1685 		if (c == '[')
   1686 			depth++;
   1687 		else if (c == ']')
   1688 			depth--;
   1689 	} while (depth > 0 && c && c != '\n');
   1690 
   1691 	*wp++ = '\0';
   1692 	*strp = Xclose(ws, wp);
   1693 
   1694 	return (tobool(depth == 0));
   1695 }
   1696 
   1697 /* Unget a char: handles case when we are already at the start of the buffer */
   1698 static void
   1699 ungetsc(int c)
   1700 {
   1701 	struct sretrace_info *rp = retrace_info;
   1702 
   1703 	if (backslash_skip)
   1704 		backslash_skip--;
   1705 	/* Don't unget EOF... */
   1706 	if (source->str == null && c == '\0')
   1707 		return;
   1708 	while (rp) {
   1709 		if (Xlength(rp->xs, rp->xp))
   1710 			rp->xp--;
   1711 		rp = rp->next;
   1712 	}
   1713 	ungetsc_i(c);
   1714 }
   1715 static void
   1716 ungetsc_i(int c)
   1717 {
   1718 	if (source->str > source->start)
   1719 		source->str--;
   1720 	else {
   1721 		Source *s;
   1722 
   1723 		s = pushs(SREREAD, source->areap);
   1724 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1725 		s->start = s->str = s->ugbuf;
   1726 		s->next = source;
   1727 		source = s;
   1728 	}
   1729 }
   1730 
   1731 
   1732 /* Called to get a char that isn't a \newline sequence. */
   1733 static int
   1734 getsc_bn(void)
   1735 {
   1736 	int c, c2;
   1737 
   1738 	if (ignore_backslash_newline)
   1739 		return (o_getsc_u());
   1740 
   1741 	if (backslash_skip == 1) {
   1742 		backslash_skip = 2;
   1743 		return (o_getsc_u());
   1744 	}
   1745 
   1746 	backslash_skip = 0;
   1747 
   1748 	while (/* CONSTCOND */ 1) {
   1749 		c = o_getsc_u();
   1750 		if (c == '\\') {
   1751 			if ((c2 = o_getsc_u()) == '\n')
   1752 				/* ignore the \newline; get the next char... */
   1753 				continue;
   1754 			ungetsc_i(c2);
   1755 			backslash_skip = 1;
   1756 		}
   1757 		return (c);
   1758 	}
   1759 }
   1760 
   1761 void
   1762 yyskiputf8bom(void)
   1763 {
   1764 	int c;
   1765 
   1766 	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
   1767 		ungetsc_i(c);
   1768 		return;
   1769 	}
   1770 	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
   1771 		ungetsc_i(c);
   1772 		ungetsc_i(0xEF);
   1773 		return;
   1774 	}
   1775 	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
   1776 		ungetsc_i(c);
   1777 		ungetsc_i(0xBB);
   1778 		ungetsc_i(0xEF);
   1779 		return;
   1780 	}
   1781 	UTFMODE |= 8;
   1782 }
   1783 
   1784 static Lex_state *
   1785 push_state_i(State_info *si, Lex_state *old_end)
   1786 {
   1787 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
   1788 
   1789 	news[0].ls_base = old_end;
   1790 	si->base = &news[0];
   1791 	si->end = &news[STATE_BSIZE];
   1792 	return (&news[1]);
   1793 }
   1794 
   1795 static Lex_state *
   1796 pop_state_i(State_info *si, Lex_state *old_end)
   1797 {
   1798 	Lex_state *old_base = si->base;
   1799 
   1800 	si->base = old_end->ls_base - STATE_BSIZE;
   1801 	si->end = old_end->ls_base;
   1802 
   1803 	afree(old_base, ATEMP);
   1804 
   1805 	return (si->base + STATE_BSIZE - 1);
   1806 }
   1807 
   1808 static int
   1809 s_get(void)
   1810 {
   1811 	return (getsc());
   1812 }
   1813 
   1814 static void
   1815 s_put(int c)
   1816 {
   1817 	ungetsc(c);
   1818 }
   1819