Home | History | Annotate | Download | only in src
      1 /*	$OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
      5  *		 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
      6  *	mirabilos <m (at) mirbsd.org>
      7  *
      8  * Provided that these terms and disclaimer and all copyright notices
      9  * are retained or reproduced in an accompanying document, permission
     10  * is granted to deal in this work without restriction, including un-
     11  * limited rights to use, publicly perform, distribute, sell, modify,
     12  * merge, give away, or sublicence.
     13  *
     14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
     15  * the utmost extent permitted by applicable law, neither express nor
     16  * implied; without malicious intent or gross negligence. In no event
     17  * may a licensor, author or contributor be held liable for indirect,
     18  * direct, other damage, loss, or other issues arising in any way out
     19  * of dealing in the work, even if advised of the possibility of such
     20  * damage or existence of a defect, except proven that it results out
     21  * of said person's immediate fault when using the work as intended.
     22  */
     23 
     24 #include "sh.h"
     25 
     26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.250 2018/10/20 18:34:14 tg Exp $");
     27 
     28 /*
     29  * states while lexing word
     30  */
     31 #define SBASE		0	/* outside any lexical constructs */
     32 #define SWORD		1	/* implicit quoting for substitute() */
     33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
     34 #define SSQUOTE		3	/* inside '' */
     35 #define SDQUOTE		4	/* inside "" */
     36 #define SEQUOTE		5	/* inside $'' */
     37 #define SBRACE		6	/* inside ${} */
     38 #define SQBRACE		7	/* inside "${}" */
     39 #define SBQUOTE		8	/* inside `` */
     40 #define SASPAREN	9	/* inside $(( )) */
     41 #define SHEREDELIM	10	/* parsing << or <<- delimiter */
     42 #define SHEREDQUOTE	11	/* parsing " in << or <<- delimiter */
     43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
     44 #define SADELIM		13	/* like SBASE, looking for delimiter */
     45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
     46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
     47 #define SINVALID	255	/* invalid state */
     48 
     49 struct sretrace_info {
     50 	struct sretrace_info *next;
     51 	XString xs;
     52 	char *xp;
     53 };
     54 
     55 /*
     56  * Structure to keep track of the lexing state and the various pieces of info
     57  * needed for each particular state.
     58  */
     59 typedef struct lex_state {
     60 	union {
     61 		/* point to the next state block */
     62 		struct lex_state *base;
     63 		/* marks start of state output in output string */
     64 		size_t start;
     65 		/* SBQUOTE: true if in double quotes: "`...`" */
     66 		/* SEQUOTE: got NUL, ignore rest of string */
     67 		bool abool;
     68 		/* SADELIM information */
     69 		struct {
     70 			/* character to search for */
     71 			unsigned char delimiter;
     72 			/* max. number of delimiters */
     73 			unsigned char num;
     74 		} adelim;
     75 	} u;
     76 	/* count open parentheses */
     77 	short nparen;
     78 	/* type of this state */
     79 	uint8_t type;
     80 	/* extra flags */
     81 	uint8_t ls_flags;
     82 } Lex_state;
     83 #define ls_base		u.base
     84 #define ls_start	u.start
     85 #define ls_bool		u.abool
     86 #define ls_adelim	u.adelim
     87 
     88 /* ls_flags */
     89 #define LS_HEREDOC	BIT(0)
     90 
     91 typedef struct {
     92 	Lex_state *base;
     93 	Lex_state *end;
     94 } State_info;
     95 
     96 static void readhere(struct ioword *);
     97 static void ungetsc(int);
     98 static void ungetsc_i(int);
     99 static int getsc_uu(void);
    100 static void getsc_line(Source *);
    101 static int getsc_bn(void);
    102 static int getsc_i(void);
    103 static char *get_brace_var(XString *, char *);
    104 static bool arraysub(char **);
    105 static void gethere(void);
    106 static Lex_state *push_state_i(State_info *, Lex_state *);
    107 static Lex_state *pop_state_i(State_info *, Lex_state *);
    108 
    109 static int backslash_skip;
    110 static int ignore_backslash_newline;
    111 
    112 /* optimised getsc_bn() */
    113 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
    114 			    !backslash_skip ? *source->str++ : getsc_bn())
    115 /* optimised getsc_uu() */
    116 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
    117 
    118 /* retrace helper */
    119 #define o_getsc_r(carg)					\
    120 	int cev = (carg);				\
    121 	struct sretrace_info *rp = retrace_info;	\
    122 							\
    123 	while (rp) {					\
    124 		Xcheck(rp->xs, rp->xp);			\
    125 		*rp->xp++ = cev;			\
    126 		rp = rp->next;				\
    127 	}						\
    128 							\
    129 	return (cev);
    130 
    131 /* callback */
    132 static int
    133 getsc_i(void)
    134 {
    135 	o_getsc_r((unsigned int)(unsigned char)o_getsc());
    136 }
    137 
    138 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
    139 #define getsc()		getsc_i()
    140 #else
    141 static int getsc_r(int);
    142 
    143 static int
    144 getsc_r(int c)
    145 {
    146 	o_getsc_r(c);
    147 }
    148 
    149 #define getsc()		getsc_r((unsigned int)(unsigned char)o_getsc())
    150 #endif
    151 
    152 #define STATE_BSIZE	8
    153 
    154 #define PUSH_STATE(s)	do {					\
    155 	uint8_t state_flags = statep->ls_flags;			\
    156 	if (++statep == state_info.end)				\
    157 		statep = push_state_i(&state_info, statep);	\
    158 	state = statep->type = (s);				\
    159 	statep->ls_flags = state_flags;				\
    160 } while (/* CONSTCOND */ 0)
    161 
    162 #define POP_STATE()	do {					\
    163 	if (--statep == state_info.base)			\
    164 		statep = pop_state_i(&state_info, statep);	\
    165 	state = statep->type;					\
    166 } while (/* CONSTCOND */ 0)
    167 
    168 #define PUSH_SRETRACE(s) do {					\
    169 	struct sretrace_info *ri;				\
    170 								\
    171 	PUSH_STATE(s);						\
    172 	statep->ls_start = Xsavepos(ws, wp);			\
    173 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
    174 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
    175 	ri->next = retrace_info;				\
    176 	retrace_info = ri;					\
    177 } while (/* CONSTCOND */ 0)
    178 
    179 #define POP_SRETRACE()	do {					\
    180 	wp = Xrestpos(ws, wp, statep->ls_start);		\
    181 	*retrace_info->xp = '\0';				\
    182 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
    183 	dp = (void *)retrace_info;				\
    184 	retrace_info = retrace_info->next;			\
    185 	afree(dp, ATEMP);					\
    186 	POP_STATE();						\
    187 } while (/* CONSTCOND */ 0)
    188 
    189 /**
    190  * Lexical analyser
    191  *
    192  * tokens are not regular expressions, they are LL(1).
    193  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    194  * hence the state stack. Note "$(...)" are now parsed recursively.
    195  */
    196 
    197 int
    198 yylex(int cf)
    199 {
    200 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
    201 	State_info state_info;
    202 	int c, c2, state;
    203 	size_t cz;
    204 	XString ws;		/* expandable output word */
    205 	char *wp;		/* output word pointer */
    206 	char *sp, *dp;
    207 
    208  Again:
    209 	states[0].type = SINVALID;
    210 	states[0].ls_base = NULL;
    211 	statep = &states[1];
    212 	state_info.base = states;
    213 	state_info.end = &state_info.base[STATE_BSIZE];
    214 
    215 	Xinit(ws, wp, 64, ATEMP);
    216 
    217 	backslash_skip = 0;
    218 	ignore_backslash_newline = 0;
    219 
    220 	if (cf & ONEWORD)
    221 		state = SWORD;
    222 	else if (cf & LETEXPR) {
    223 		/* enclose arguments in (double) quotes */
    224 		*wp++ = OQUOTE;
    225 		state = SLETPAREN;
    226 		statep->nparen = 0;
    227 	} else {
    228 		/* normal lexing */
    229 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    230 		do {
    231 			c = getsc();
    232 		} while (ctype(c, C_BLANK));
    233 		if (c == '#') {
    234 			ignore_backslash_newline++;
    235 			do {
    236 				c = getsc();
    237 			} while (!ctype(c, C_NUL | C_LF));
    238 			ignore_backslash_newline--;
    239 		}
    240 		ungetsc(c);
    241 	}
    242 	if (source->flags & SF_ALIAS) {
    243 		/* trailing ' ' in alias definition */
    244 		source->flags &= ~SF_ALIAS;
    245 		/* POSIX: trailing space only counts if parsing simple cmd */
    246 		if (!Flag(FPOSIX) || (cf & CMDWORD))
    247 			cf |= ALIAS;
    248 	}
    249 
    250 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
    251 	statep->type = state;
    252 	statep->ls_flags = (cf & HEREDOC) ? LS_HEREDOC : 0;
    253 
    254 	/* collect non-special or quoted characters to form word */
    255 	while (!((c = getsc()) == 0 ||
    256 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
    257 		if (state == SBASE &&
    258 		    subshell_nesting_type == ORD(/*{*/ '}') &&
    259 		    (unsigned int)c == ORD(/*{*/ '}'))
    260 			/* possibly end ${ :;} */
    261 			break;
    262 		Xcheck(ws, wp);
    263 		switch (state) {
    264 		case SADELIM:
    265 			if ((unsigned int)c == ORD('('))
    266 				statep->nparen++;
    267 			else if ((unsigned int)c == ORD(')'))
    268 				statep->nparen--;
    269 			else if (statep->nparen == 0 &&
    270 			    ((unsigned int)c == ORD(/*{*/ '}') ||
    271 			    c == (int)statep->ls_adelim.delimiter)) {
    272 				*wp++ = ADELIM;
    273 				*wp++ = c;
    274 				if ((unsigned int)c == ORD(/*{*/ '}') ||
    275 				    --statep->ls_adelim.num == 0)
    276 					POP_STATE();
    277 				if ((unsigned int)c == ORD(/*{*/ '}'))
    278 					POP_STATE();
    279 				break;
    280 			}
    281 			/* FALLTHROUGH */
    282 		case SBASE:
    283 			if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
    284 				/* temporary */
    285 				*wp = EOS;
    286 				if (is_wdvarname(Xstring(ws, wp), false)) {
    287 					char *p, *tmp;
    288 
    289 					if (arraysub(&tmp)) {
    290 						*wp++ = CHAR;
    291 						*wp++ = c;
    292 						for (p = tmp; *p; ) {
    293 							Xcheck(ws, wp);
    294 							*wp++ = CHAR;
    295 							*wp++ = *p++;
    296 						}
    297 						afree(tmp, ATEMP);
    298 						break;
    299 					}
    300 				}
    301 				*wp++ = CHAR;
    302 				*wp++ = c;
    303 				break;
    304 			}
    305 			/* FALLTHROUGH */
    306  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
    307 			if (ctype(c, C_PATMO)) {
    308 				c2 = getsc();
    309 				if ((unsigned int)c2 == ORD('(' /*)*/)) {
    310 					*wp++ = OPAT;
    311 					*wp++ = c;
    312 					PUSH_STATE(SPATTERN);
    313 					break;
    314 				}
    315 				ungetsc(c2);
    316 			}
    317 			/* FALLTHROUGH */
    318  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
    319 			switch (c) {
    320 			case ORD('\\'):
    321  getsc_qchar:
    322 				if ((c = getsc())) {
    323 					/* trailing \ is lost */
    324 					*wp++ = QCHAR;
    325 					*wp++ = c;
    326 				}
    327 				break;
    328 			case ORD('\''):
    329  open_ssquote_unless_heredoc:
    330 				if ((statep->ls_flags & LS_HEREDOC))
    331 					goto store_char;
    332 				*wp++ = OQUOTE;
    333 				ignore_backslash_newline++;
    334 				PUSH_STATE(SSQUOTE);
    335 				break;
    336 			case ORD('"'):
    337  open_sdquote:
    338 				*wp++ = OQUOTE;
    339 				PUSH_STATE(SDQUOTE);
    340 				break;
    341 			case ORD('$'):
    342 				/*
    343 				 * processing of dollar sign belongs into
    344 				 * Subst, except for those which can open
    345 				 * a string: $'' and $""
    346 				 */
    347  subst_dollar_ex:
    348 				c = getsc();
    349 				switch (c) {
    350 				case ORD('"'):
    351 					goto open_sdquote;
    352 				case ORD('\''):
    353 					goto open_sequote;
    354 				default:
    355 					goto SubstS;
    356 				}
    357 			default:
    358 				goto Subst;
    359 			}
    360 			break;
    361 
    362  Subst:
    363 			switch (c) {
    364 			case ORD('\\'):
    365 				c = getsc();
    366 				switch (c) {
    367 				case ORD('"'):
    368 					if ((statep->ls_flags & LS_HEREDOC))
    369 						goto heredocquote;
    370 					/* FALLTHROUGH */
    371 				case ORD('\\'):
    372 				case ORD('$'):
    373 				case ORD('`'):
    374  store_qchar:
    375 					*wp++ = QCHAR;
    376 					*wp++ = c;
    377 					break;
    378 				default:
    379  heredocquote:
    380 					Xcheck(ws, wp);
    381 					if (c) {
    382 						/* trailing \ is lost */
    383 						*wp++ = CHAR;
    384 						*wp++ = '\\';
    385 						*wp++ = CHAR;
    386 						*wp++ = c;
    387 					}
    388 					break;
    389 				}
    390 				break;
    391 			case ORD('$'):
    392 				c = getsc();
    393  SubstS:
    394 				if ((unsigned int)c == ORD('(' /*)*/)) {
    395 					c = getsc();
    396 					if ((unsigned int)c == ORD('(' /*)*/)) {
    397 						*wp++ = EXPRSUB;
    398 						PUSH_SRETRACE(SASPAREN);
    399 						/* unneeded? */
    400 						/*statep->ls_flags &= ~LS_HEREDOC;*/
    401 						statep->nparen = 2;
    402 						*retrace_info->xp++ = '(';
    403 					} else {
    404 						ungetsc(c);
    405  subst_command:
    406 						c = COMSUB;
    407  subst_command2:
    408 						sp = yyrecursive(c);
    409 						cz = strlen(sp) + 1;
    410 						XcheckN(ws, wp, cz);
    411 						*wp++ = c;
    412 						memcpy(wp, sp, cz);
    413 						wp += cz;
    414 					}
    415 				} else if ((unsigned int)c == ORD('{' /*}*/)) {
    416 					if ((unsigned int)(c = getsc()) == ORD('|')) {
    417 						/*
    418 						 * non-subenvironment
    419 						 * value substitution
    420 						 */
    421 						c = VALSUB;
    422 						goto subst_command2;
    423 					} else if (ctype(c, C_IFSWS)) {
    424 						/*
    425 						 * non-subenvironment
    426 						 * "command" substitution
    427 						 */
    428 						c = FUNSUB;
    429 						goto subst_command2;
    430 					}
    431 					ungetsc(c);
    432 					*wp++ = OSUBST;
    433 					*wp++ = '{' /*}*/;
    434 					wp = get_brace_var(&ws, wp);
    435 					c = getsc();
    436 					/* allow :# and :% (ksh88 compat) */
    437 					if ((unsigned int)c == ORD(':')) {
    438 						*wp++ = CHAR;
    439 						*wp++ = c;
    440 						c = getsc();
    441 						if ((unsigned int)c == ORD(':')) {
    442 							*wp++ = CHAR;
    443 							*wp++ = '0';
    444 							*wp++ = ADELIM;
    445 							*wp++ = ':';
    446 							PUSH_STATE(SBRACE);
    447 							/* perhaps unneeded? */
    448 							statep->ls_flags &= ~LS_HEREDOC;
    449 							PUSH_STATE(SADELIM);
    450 							statep->ls_adelim.delimiter = ':';
    451 							statep->ls_adelim.num = 1;
    452 							statep->nparen = 0;
    453 							break;
    454 						} else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
    455 						    /*XXX what else? */
    456 						    c == '(' /*)*/) {
    457 							/* substring subst. */
    458 							if (c != ' ') {
    459 								*wp++ = CHAR;
    460 								*wp++ = ' ';
    461 							}
    462 							ungetsc(c);
    463 							PUSH_STATE(SBRACE);
    464 							/* perhaps unneeded? */
    465 							statep->ls_flags &= ~LS_HEREDOC;
    466 							PUSH_STATE(SADELIM);
    467 							statep->ls_adelim.delimiter = ':';
    468 							statep->ls_adelim.num = 2;
    469 							statep->nparen = 0;
    470 							break;
    471 						}
    472 					} else if (c == '/') {
    473 						c2 = ADELIM;
    474  parse_adelim_slash:
    475 						*wp++ = CHAR;
    476 						*wp++ = c;
    477 						if ((unsigned int)(c = getsc()) == ORD('/')) {
    478 							*wp++ = c2;
    479 							*wp++ = c;
    480 						} else
    481 							ungetsc(c);
    482 						PUSH_STATE(SBRACE);
    483 						/* perhaps unneeded? */
    484 						statep->ls_flags &= ~LS_HEREDOC;
    485 						PUSH_STATE(SADELIM);
    486 						statep->ls_adelim.delimiter = '/';
    487 						statep->ls_adelim.num = 1;
    488 						statep->nparen = 0;
    489 						break;
    490 					} else if (c == '@') {
    491 						c2 = getsc();
    492 						ungetsc(c2);
    493 						if ((unsigned int)c2 == ORD('/')) {
    494 							c2 = CHAR;
    495 							goto parse_adelim_slash;
    496 						}
    497 					}
    498 					/*
    499 					 * If this is a trim operation,
    500 					 * treat (,|,) specially in STBRACE.
    501 					 */
    502 					if (ctype(c, C_SUB2)) {
    503 						ungetsc(c);
    504 						if (Flag(FSH))
    505 							PUSH_STATE(STBRACEBOURNE);
    506 						else
    507 							PUSH_STATE(STBRACEKORN);
    508 						/* single-quotes-in-heredoc-trim */
    509 						statep->ls_flags &= ~LS_HEREDOC;
    510 					} else {
    511 						ungetsc(c);
    512 						if (state == SDQUOTE ||
    513 						    state == SQBRACE)
    514 							PUSH_STATE(SQBRACE);
    515 						else
    516 							PUSH_STATE(SBRACE);
    517 						/* here no LS_HEREDOC removal */
    518 						/* single-quotes-in-heredoc-braces */
    519 					}
    520 				} else if (ctype(c, C_ALPHX)) {
    521 					*wp++ = OSUBST;
    522 					*wp++ = 'X';
    523 					do {
    524 						Xcheck(ws, wp);
    525 						*wp++ = c;
    526 						c = getsc();
    527 					} while (ctype(c, C_ALNUX));
    528 					*wp++ = '\0';
    529 					*wp++ = CSUBST;
    530 					*wp++ = 'X';
    531 					ungetsc(c);
    532 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
    533 					Xcheck(ws, wp);
    534 					*wp++ = OSUBST;
    535 					*wp++ = 'X';
    536 					*wp++ = c;
    537 					*wp++ = '\0';
    538 					*wp++ = CSUBST;
    539 					*wp++ = 'X';
    540 				} else {
    541 					*wp++ = CHAR;
    542 					*wp++ = '$';
    543 					ungetsc(c);
    544 				}
    545 				break;
    546 			case ORD('`'):
    547  subst_gravis:
    548 				PUSH_STATE(SBQUOTE);
    549 				*wp++ = COMASUB;
    550 				/*
    551 				 * We need to know whether we are within double
    552 				 * quotes in order to translate \" to " within
    553 				 * "`\"`" because, unlike for COMSUBs, the
    554 				 * outer double quoteing changes the backslash
    555 				 * meaning for the inside. For more details:
    556 				 * http://austingroupbugs.net/view.php?id=1015
    557 				 */
    558 				statep->ls_bool = false;
    559 				s2 = statep;
    560 				base = state_info.base;
    561 				while (/* CONSTCOND */ 1) {
    562 					for (; s2 != base; s2--) {
    563 						if (s2->type == SDQUOTE) {
    564 							statep->ls_bool = true;
    565 							break;
    566 						}
    567 					}
    568 					if (s2 != base)
    569 						break;
    570 					if (!(s2 = s2->ls_base))
    571 						break;
    572 					base = s2-- - STATE_BSIZE;
    573 				}
    574 				break;
    575 			case QCHAR:
    576 				if (cf & LQCHAR) {
    577 					*wp++ = QCHAR;
    578 					*wp++ = getsc();
    579 					break;
    580 				}
    581 				/* FALLTHROUGH */
    582 			default:
    583  store_char:
    584 				*wp++ = CHAR;
    585 				*wp++ = c;
    586 			}
    587 			break;
    588 
    589 		case SEQUOTE:
    590 			if ((unsigned int)c == ORD('\'')) {
    591 				POP_STATE();
    592 				*wp++ = CQUOTE;
    593 				ignore_backslash_newline--;
    594 			} else if ((unsigned int)c == ORD('\\')) {
    595 				if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
    596 					c2 = getsc();
    597 				if (c2 == 0)
    598 					statep->ls_bool = true;
    599 				if (!statep->ls_bool) {
    600 					char ts[4];
    601 
    602 					if ((unsigned int)c2 < 0x100) {
    603 						*wp++ = QCHAR;
    604 						*wp++ = c2;
    605 					} else {
    606 						cz = utf_wctomb(ts, c2 - 0x100);
    607 						ts[cz] = 0;
    608 						cz = 0;
    609 						do {
    610 							*wp++ = QCHAR;
    611 							*wp++ = ts[cz];
    612 						} while (ts[++cz]);
    613 					}
    614 				}
    615 			} else if (!statep->ls_bool) {
    616 				*wp++ = QCHAR;
    617 				*wp++ = c;
    618 			}
    619 			break;
    620 
    621 		case SSQUOTE:
    622 			if ((unsigned int)c == ORD('\'')) {
    623 				POP_STATE();
    624 				if ((statep->ls_flags & LS_HEREDOC) ||
    625 				    state == SQBRACE)
    626 					goto store_char;
    627 				*wp++ = CQUOTE;
    628 				ignore_backslash_newline--;
    629 			} else {
    630 				*wp++ = QCHAR;
    631 				*wp++ = c;
    632 			}
    633 			break;
    634 
    635 		case SDQUOTE:
    636 			if ((unsigned int)c == ORD('"')) {
    637 				POP_STATE();
    638 				*wp++ = CQUOTE;
    639 			} else
    640 				goto Subst;
    641 			break;
    642 
    643 		/* $(( ... )) */
    644 		case SASPAREN:
    645 			if ((unsigned int)c == ORD('('))
    646 				statep->nparen++;
    647 			else if ((unsigned int)c == ORD(')')) {
    648 				statep->nparen--;
    649 				if (statep->nparen == 1) {
    650 					/* end of EXPRSUB */
    651 					POP_SRETRACE();
    652 
    653 					if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
    654 						cz = strlen(sp) - 2;
    655 						XcheckN(ws, wp, cz);
    656 						memcpy(wp, sp + 1, cz);
    657 						wp += cz;
    658 						afree(sp, ATEMP);
    659 						*wp++ = '\0';
    660 						break;
    661 					} else {
    662 						Source *s;
    663 
    664 						ungetsc(c2);
    665 						/*
    666 						 * mismatched parenthesis -
    667 						 * assume we were really
    668 						 * parsing a $(...) expression
    669 						 */
    670 						--wp;
    671 						s = pushs(SREREAD,
    672 						    source->areap);
    673 						s->start = s->str =
    674 						    s->u.freeme = sp;
    675 						s->next = source;
    676 						source = s;
    677 						goto subst_command;
    678 					}
    679 				}
    680 			}
    681 			/* reuse existing state machine */
    682 			goto Sbase2;
    683 
    684 		case SQBRACE:
    685 			if ((unsigned int)c == ORD('\\')) {
    686 				/*
    687 				 * perform POSIX "quote removal" if the back-
    688 				 * slash is "special", i.e. same cases as the
    689 				 * {case '\\':} in Subst: plus closing brace;
    690 				 * in mksh code "quote removal" on '\c' means
    691 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
    692 				 * emitted (in heredocquote:)
    693 				 */
    694 				if ((unsigned int)(c = getsc()) == ORD('"') ||
    695 				    (unsigned int)c == ORD('\\') ||
    696 				    ctype(c, C_DOLAR | C_GRAVE) ||
    697 				    (unsigned int)c == ORD(/*{*/ '}'))
    698 					goto store_qchar;
    699 				goto heredocquote;
    700 			}
    701 			goto common_SQBRACE;
    702 
    703 		case SBRACE:
    704 			if ((unsigned int)c == ORD('\''))
    705 				goto open_ssquote_unless_heredoc;
    706 			else if ((unsigned int)c == ORD('\\'))
    707 				goto getsc_qchar;
    708  common_SQBRACE:
    709 			if ((unsigned int)c == ORD('"'))
    710 				goto open_sdquote;
    711 			else if ((unsigned int)c == ORD('$'))
    712 				goto subst_dollar_ex;
    713 			else if ((unsigned int)c == ORD('`'))
    714 				goto subst_gravis;
    715 			else if ((unsigned int)c != ORD(/*{*/ '}'))
    716 				goto store_char;
    717 			POP_STATE();
    718 			*wp++ = CSUBST;
    719 			*wp++ = /*{*/ '}';
    720 			break;
    721 
    722 		/* Same as SBASE, except (,|,) treated specially */
    723 		case STBRACEKORN:
    724 			if ((unsigned int)c == ORD('|'))
    725 				*wp++ = SPAT;
    726 			else if ((unsigned int)c == ORD('(')) {
    727 				*wp++ = OPAT;
    728 				/* simile for @ */
    729 				*wp++ = ' ';
    730 				PUSH_STATE(SPATTERN);
    731 			} else /* FALLTHROUGH */
    732 		case STBRACEBOURNE:
    733 			  if ((unsigned int)c == ORD(/*{*/ '}')) {
    734 				POP_STATE();
    735 				*wp++ = CSUBST;
    736 				*wp++ = /*{*/ '}';
    737 			} else
    738 				goto Sbase1;
    739 			break;
    740 
    741 		case SBQUOTE:
    742 			if ((unsigned int)c == ORD('`')) {
    743 				*wp++ = 0;
    744 				POP_STATE();
    745 			} else if ((unsigned int)c == ORD('\\')) {
    746 				switch (c = getsc()) {
    747 				case 0:
    748 					/* trailing \ is lost */
    749 					break;
    750 				case ORD('$'):
    751 				case ORD('`'):
    752 				case ORD('\\'):
    753 					*wp++ = c;
    754 					break;
    755 				case ORD('"'):
    756 					if (statep->ls_bool) {
    757 						*wp++ = c;
    758 						break;
    759 					}
    760 					/* FALLTHROUGH */
    761 				default:
    762 					*wp++ = '\\';
    763 					*wp++ = c;
    764 					break;
    765 				}
    766 			} else
    767 				*wp++ = c;
    768 			break;
    769 
    770 		/* ONEWORD */
    771 		case SWORD:
    772 			goto Subst;
    773 
    774 		/* LETEXPR: (( ... )) */
    775 		case SLETPAREN:
    776 			if ((unsigned int)c == ORD(/*(*/ ')')) {
    777 				if (statep->nparen > 0)
    778 					--statep->nparen;
    779 				else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
    780 					c = 0;
    781 					*wp++ = CQUOTE;
    782 					goto Done;
    783 				} else {
    784 					Source *s;
    785 
    786 					ungetsc(c2);
    787 					ungetsc(c);
    788 					/*
    789 					 * mismatched parenthesis -
    790 					 * assume we were really
    791 					 * parsing a (...) expression
    792 					 */
    793 					*wp = EOS;
    794 					sp = Xstring(ws, wp);
    795 					dp = wdstrip(sp + 1, WDS_TPUTS);
    796 					s = pushs(SREREAD, source->areap);
    797 					s->start = s->str = s->u.freeme = dp;
    798 					s->next = source;
    799 					source = s;
    800 					ungetsc('(' /*)*/);
    801 					return (ORD('(' /*)*/));
    802 				}
    803 			} else if ((unsigned int)c == ORD('('))
    804 				/*
    805 				 * parentheses inside quotes and
    806 				 * backslashes are lost, but AT&T ksh
    807 				 * doesn't count them either
    808 				 */
    809 				++statep->nparen;
    810 			goto Sbase2;
    811 
    812 		/* << or <<- delimiter */
    813 		case SHEREDELIM:
    814 			/*
    815 			 * here delimiters need a special case since
    816 			 * $ and `...` are not to be treated specially
    817 			 */
    818 			switch (c) {
    819 			case ORD('\\'):
    820 				if ((c = getsc())) {
    821 					/* trailing \ is lost */
    822 					*wp++ = QCHAR;
    823 					*wp++ = c;
    824 				}
    825 				break;
    826 			case ORD('\''):
    827 				goto open_ssquote_unless_heredoc;
    828 			case ORD('$'):
    829 				if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
    830  open_sequote:
    831 					*wp++ = OQUOTE;
    832 					ignore_backslash_newline++;
    833 					PUSH_STATE(SEQUOTE);
    834 					statep->ls_bool = false;
    835 					break;
    836 				} else if ((unsigned int)c2 == ORD('"')) {
    837 					/* FALLTHROUGH */
    838 			case ORD('"'):
    839 					PUSH_SRETRACE(SHEREDQUOTE);
    840 					break;
    841 				}
    842 				ungetsc(c2);
    843 				/* FALLTHROUGH */
    844 			default:
    845 				*wp++ = CHAR;
    846 				*wp++ = c;
    847 			}
    848 			break;
    849 
    850 		/* " in << or <<- delimiter */
    851 		case SHEREDQUOTE:
    852 			if ((unsigned int)c != ORD('"'))
    853 				goto Subst;
    854 			POP_SRETRACE();
    855 			dp = strnul(sp) - 1;
    856 			/* remove the trailing double quote */
    857 			*dp = '\0';
    858 			/* store the quoted string */
    859 			*wp++ = OQUOTE;
    860 			XcheckN(ws, wp, (dp - sp) * 2);
    861 			dp = sp;
    862 			while ((c = *dp++)) {
    863 				if (c == '\\') {
    864 					switch ((c = *dp++)) {
    865 					case ORD('\\'):
    866 					case ORD('"'):
    867 					case ORD('$'):
    868 					case ORD('`'):
    869 						break;
    870 					default:
    871 						*wp++ = CHAR;
    872 						*wp++ = '\\';
    873 						break;
    874 					}
    875 				}
    876 				*wp++ = CHAR;
    877 				*wp++ = c;
    878 			}
    879 			afree(sp, ATEMP);
    880 			*wp++ = CQUOTE;
    881 			state = statep->type = SHEREDELIM;
    882 			break;
    883 
    884 		/* in *(...|...) pattern (*+?@!) */
    885 		case SPATTERN:
    886 			if ((unsigned int)c == ORD(/*(*/ ')')) {
    887 				*wp++ = CPAT;
    888 				POP_STATE();
    889 			} else if ((unsigned int)c == ORD('|')) {
    890 				*wp++ = SPAT;
    891 			} else if ((unsigned int)c == ORD('(')) {
    892 				*wp++ = OPAT;
    893 				/* simile for @ */
    894 				*wp++ = ' ';
    895 				PUSH_STATE(SPATTERN);
    896 			} else
    897 				goto Sbase1;
    898 			break;
    899 		}
    900 	}
    901  Done:
    902 	Xcheck(ws, wp);
    903 	if (statep != &states[1])
    904 		/* XXX figure out what is missing */
    905 		yyerror("no closing quote");
    906 
    907 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    908 	if (state == SHEREDELIM)
    909 		state = SBASE;
    910 
    911 	dp = Xstring(ws, wp);
    912 	if (state == SBASE && (
    913 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
    914 	    ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
    915 	    (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
    916 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
    917 
    918 		iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
    919 
    920 		if (c == '&') {
    921 			if ((unsigned int)(c2 = getsc()) != ORD('>')) {
    922 				ungetsc(c2);
    923 				goto no_iop;
    924 			}
    925 			c = c2;
    926 			iop->ioflag = IOBASH;
    927 		} else
    928 			iop->ioflag = 0;
    929 
    930 		c2 = getsc();
    931 		/* <<, >>, <> are ok, >< is not */
    932 		if (c == c2 || ((unsigned int)c == ORD('<') &&
    933 		    (unsigned int)c2 == ORD('>'))) {
    934 			iop->ioflag |= c == c2 ?
    935 			    ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
    936 			if (iop->ioflag == IOHERE) {
    937 				if ((unsigned int)(c2 = getsc()) == ORD('-'))
    938 					iop->ioflag |= IOSKIP;
    939 				else if ((unsigned int)c2 == ORD('<'))
    940 					iop->ioflag |= IOHERESTR;
    941 				else
    942 					ungetsc(c2);
    943 			}
    944 		} else if ((unsigned int)c2 == ORD('&'))
    945 			iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
    946 		else {
    947 			iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
    948 			if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
    949 				iop->ioflag |= IOCLOB;
    950 			else
    951 				ungetsc(c2);
    952 		}
    953 
    954 		iop->ioname = NULL;
    955 		iop->delim = NULL;
    956 		iop->heredoc = NULL;
    957 		/* free word */
    958 		Xfree(ws, wp);
    959 		yylval.iop = iop;
    960 		return (REDIR);
    961  no_iop:
    962 		afree(iop, ATEMP);
    963 	}
    964 
    965 	if (wp == dp && state == SBASE) {
    966 		/* free word */
    967 		Xfree(ws, wp);
    968 		/* no word, process LEX1 character */
    969 		if (((unsigned int)c == ORD('|')) ||
    970 		    ((unsigned int)c == ORD('&')) ||
    971 		    ((unsigned int)c == ORD(';')) ||
    972 		    ((unsigned int)c == ORD('(' /*)*/))) {
    973 			if ((c2 = getsc()) == c)
    974 				c = ((unsigned int)c == ORD(';')) ? BREAK :
    975 				    ((unsigned int)c == ORD('|')) ? LOGOR :
    976 				    ((unsigned int)c == ORD('&')) ? LOGAND :
    977 				    /* (unsigned int)c == ORD('(' )) */ MDPAREN;
    978 			else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
    979 				c = COPROC;
    980 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
    981 				c = BRKEV;
    982 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
    983 				c = BRKFT;
    984 			else
    985 				ungetsc(c2);
    986 #ifndef MKSH_SMALL
    987 			if (c == BREAK) {
    988 				if ((unsigned int)(c2 = getsc()) == ORD('&'))
    989 					c = BRKEV;
    990 				else
    991 					ungetsc(c2);
    992 			}
    993 #endif
    994 		} else if ((unsigned int)c == ORD('\n')) {
    995 			if (cf & HEREDELIM)
    996 				ungetsc(c);
    997 			else {
    998 				gethere();
    999 				if (cf & CONTIN)
   1000 					goto Again;
   1001 			}
   1002 		} else if (c == '\0' && !(cf & HEREDELIM)) {
   1003 			struct ioword **p = heres;
   1004 
   1005 			while (p < herep)
   1006 				if ((*p)->ioflag & IOHERESTR)
   1007 					++p;
   1008 				else
   1009 					/* ksh -c 'cat <<EOF' can cause this */
   1010 					yyerror(Tf_heredoc,
   1011 					    evalstr((*p)->delim, 0));
   1012 		}
   1013 		return (c);
   1014 	}
   1015 
   1016 	/* terminate word */
   1017 	*wp++ = EOS;
   1018 	yylval.cp = Xclose(ws, wp);
   1019 	if (state == SWORD || state == SLETPAREN
   1020 	    /* XXX ONEWORD? */)
   1021 		return (LWORD);
   1022 
   1023 	/* unget terminator */
   1024 	ungetsc(c);
   1025 
   1026 	/*
   1027 	 * note: the alias-vs-function code below depends on several
   1028 	 * interna: starting from here, source->str is not modified;
   1029 	 * the way getsc() and ungetsc() operate; etc.
   1030 	 */
   1031 
   1032 	/* copy word to unprefixed string ident */
   1033 	sp = yylval.cp;
   1034 	dp = ident;
   1035 	while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
   1036 		*dp++ = *sp++;
   1037 	if (c != EOS)
   1038 		/* word is not unquoted, or space ran out */
   1039 		dp = ident;
   1040 	/* make sure the ident array stays NUL padded */
   1041 	memset(dp, 0, (ident + IDENT) - dp + 1);
   1042 
   1043 	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
   1044 		struct tbl *p;
   1045 		uint32_t h = hash(ident);
   1046 
   1047 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
   1048 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
   1049 		    (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
   1050 			afree(yylval.cp, ATEMP);
   1051 			return (p->val.i);
   1052 		}
   1053 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
   1054 		    (p->flag & ISSET)) {
   1055 			/*
   1056 			 * this still points to the same character as the
   1057 			 * ungetsc'd terminator from above
   1058 			 */
   1059 			const char *cp = source->str;
   1060 
   1061 			/* prefer POSIX but not Korn functions over aliases */
   1062 			while (ctype(*cp, C_BLANK))
   1063 				/*
   1064 				 * this is like getsc() without skipping
   1065 				 * over Source boundaries (including not
   1066 				 * parsing ungetsc'd characters that got
   1067 				 * pushed into an SREREAD) which is what
   1068 				 * we want here anyway: find out whether
   1069 				 * the alias name is followed by a POSIX
   1070 				 * function definition
   1071 				 */
   1072 				++cp;
   1073 			/* prefer functions over aliases */
   1074 			if (cp[0] != '(' || cp[1] != ')') {
   1075 				Source *s = source;
   1076 
   1077 				while (s && (s->flags & SF_HASALIAS))
   1078 					if (s->u.tblp == p)
   1079 						return (LWORD);
   1080 					else
   1081 						s = s->next;
   1082 				/* push alias expansion */
   1083 				s = pushs(SALIAS, source->areap);
   1084 				s->start = s->str = p->val.s;
   1085 				s->u.tblp = p;
   1086 				s->flags |= SF_HASALIAS;
   1087 				s->line = source->line;
   1088 				s->next = source;
   1089 				if (source->type == SEOF) {
   1090 					/* prevent infinite recursion at EOS */
   1091 					source->u.tblp = p;
   1092 					source->flags |= SF_HASALIAS;
   1093 				}
   1094 				source = s;
   1095 				afree(yylval.cp, ATEMP);
   1096 				goto Again;
   1097 			}
   1098 		}
   1099 	} else if (*ident == '\0') {
   1100 		/* retain typeset et al. even when quoted */
   1101 		struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
   1102 		uint32_t flag = tt ? tt->flag : 0;
   1103 
   1104 		if (flag & (DECL_UTIL | DECL_FWDR))
   1105 			strlcpy(ident, dp, sizeof(ident));
   1106 		afree(dp, ATEMP);
   1107 	}
   1108 
   1109 	return (LWORD);
   1110 }
   1111 
   1112 static void
   1113 gethere(void)
   1114 {
   1115 	struct ioword **p;
   1116 
   1117 	for (p = heres; p < herep; p++)
   1118 		if (!((*p)->ioflag & IOHERESTR))
   1119 			readhere(*p);
   1120 	herep = heres;
   1121 }
   1122 
   1123 /*
   1124  * read "<<word" text into temp file
   1125  */
   1126 
   1127 static void
   1128 readhere(struct ioword *iop)
   1129 {
   1130 	int c;
   1131 	const char *eof, *eofp;
   1132 	XString xs;
   1133 	char *xp;
   1134 	size_t xpos;
   1135 
   1136 	eof = evalstr(iop->delim, 0);
   1137 
   1138 	if (!(iop->ioflag & IOEVAL))
   1139 		ignore_backslash_newline++;
   1140 
   1141 	Xinit(xs, xp, 256, ATEMP);
   1142 
   1143  heredoc_read_line:
   1144 	/* beginning of line */
   1145 	eofp = eof;
   1146 	xpos = Xsavepos(xs, xp);
   1147 	if (iop->ioflag & IOSKIP) {
   1148 		/* skip over leading tabs */
   1149 		while ((c = getsc()) == '\t')
   1150 			;	/* nothing */
   1151 		goto heredoc_parse_char;
   1152 	}
   1153  heredoc_read_char:
   1154 	c = getsc();
   1155  heredoc_parse_char:
   1156 	/* compare with here document marker */
   1157 	if (!*eofp) {
   1158 		/* end of here document marker, what to do? */
   1159 		switch (c) {
   1160 		case ORD(/*(*/ ')'):
   1161 			if (!subshell_nesting_type)
   1162 				/*-
   1163 				 * not allowed outside $(...) or (...)
   1164 				 * => mismatch
   1165 				 */
   1166 				break;
   1167 			/* allow $(...) or (...) to close here */
   1168 			ungetsc(/*(*/ ')');
   1169 			/* FALLTHROUGH */
   1170 		case 0:
   1171 			/*
   1172 			 * Allow EOF here to commands without trailing
   1173 			 * newlines (mksh -c '...') will work as well.
   1174 			 */
   1175 		case ORD('\n'):
   1176 			/* Newline terminates here document marker */
   1177 			goto heredoc_found_terminator;
   1178 		}
   1179 	} else if ((unsigned int)c == ord(*eofp++))
   1180 		/* store; then read and compare next character */
   1181 		goto heredoc_store_and_loop;
   1182 	/* nope, mismatch; read until end of line */
   1183 	while (c != '\n') {
   1184 		if (!c)
   1185 			/* oops, reached EOF */
   1186 			yyerror(Tf_heredoc, eof);
   1187 		/* store character */
   1188 		Xcheck(xs, xp);
   1189 		Xput(xs, xp, c);
   1190 		/* read next character */
   1191 		c = getsc();
   1192 	}
   1193 	/* we read a newline as last character */
   1194  heredoc_store_and_loop:
   1195 	/* store character */
   1196 	Xcheck(xs, xp);
   1197 	Xput(xs, xp, c);
   1198 	if (c == '\n')
   1199 		goto heredoc_read_line;
   1200 	goto heredoc_read_char;
   1201 
   1202  heredoc_found_terminator:
   1203 	/* jump back to saved beginning of line */
   1204 	xp = Xrestpos(xs, xp, xpos);
   1205 	/* terminate, close and store */
   1206 	Xput(xs, xp, '\0');
   1207 	iop->heredoc = Xclose(xs, xp);
   1208 
   1209 	if (!(iop->ioflag & IOEVAL))
   1210 		ignore_backslash_newline--;
   1211 }
   1212 
   1213 void
   1214 yyerror(const char *fmt, ...)
   1215 {
   1216 	va_list va;
   1217 
   1218 	/* pop aliases and re-reads */
   1219 	while (source->type == SALIAS || source->type == SREREAD)
   1220 		source = source->next;
   1221 	/* zap pending input */
   1222 	source->str = null;
   1223 
   1224 	error_prefix(true);
   1225 	va_start(va, fmt);
   1226 	shf_vfprintf(shl_out, fmt, va);
   1227 	shf_putc('\n', shl_out);
   1228 	va_end(va);
   1229 	errorfz();
   1230 }
   1231 
   1232 /*
   1233  * input for yylex with alias expansion
   1234  */
   1235 
   1236 Source *
   1237 pushs(int type, Area *areap)
   1238 {
   1239 	Source *s;
   1240 
   1241 	s = alloc(sizeof(Source), areap);
   1242 	memset(s, 0, sizeof(Source));
   1243 	s->type = type;
   1244 	s->str = null;
   1245 	s->areap = areap;
   1246 	if (type == SFILE || type == SSTDIN)
   1247 		XinitN(s->xs, 256, s->areap);
   1248 	return (s);
   1249 }
   1250 
   1251 static int
   1252 getsc_uu(void)
   1253 {
   1254 	Source *s = source;
   1255 	int c;
   1256 
   1257 	while ((c = ord(*s->str++)) == 0) {
   1258 		/* return 0 for EOF by default */
   1259 		s->str = NULL;
   1260 		switch (s->type) {
   1261 		case SEOF:
   1262 			s->str = null;
   1263 			return (0);
   1264 
   1265 		case SSTDIN:
   1266 		case SFILE:
   1267 			getsc_line(s);
   1268 			break;
   1269 
   1270 		case SWSTR:
   1271 			break;
   1272 
   1273 		case SSTRING:
   1274 		case SSTRINGCMDLINE:
   1275 			break;
   1276 
   1277 		case SWORDS:
   1278 			s->start = s->str = *s->u.strv++;
   1279 			s->type = SWORDSEP;
   1280 			break;
   1281 
   1282 		case SWORDSEP:
   1283 			if (*s->u.strv == NULL) {
   1284 				s->start = s->str = "\n";
   1285 				s->type = SEOF;
   1286 			} else {
   1287 				s->start = s->str = T1space;
   1288 				s->type = SWORDS;
   1289 			}
   1290 			break;
   1291 
   1292 		case SALIAS:
   1293 			if (s->flags & SF_ALIASEND) {
   1294 				/* pass on an unused SF_ALIAS flag */
   1295 				source = s->next;
   1296 				source->flags |= s->flags & SF_ALIAS;
   1297 				s = source;
   1298 			} else if (*s->u.tblp->val.s &&
   1299 			    ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
   1300 				/* pop source stack */
   1301 				source = s = s->next;
   1302 				/*
   1303 				 * Note that this alias ended with a
   1304 				 * space, enabling alias expansion on
   1305 				 * the following word.
   1306 				 */
   1307 				s->flags |= SF_ALIAS;
   1308 			} else {
   1309 				/*
   1310 				 * At this point, we need to keep the current
   1311 				 * alias in the source list so recursive
   1312 				 * aliases can be detected and we also need to
   1313 				 * return the next character. Do this by
   1314 				 * temporarily popping the alias to get the
   1315 				 * next character and then put it back in the
   1316 				 * source list with the SF_ALIASEND flag set.
   1317 				 */
   1318 				/* pop source stack */
   1319 				source = s->next;
   1320 				source->flags |= s->flags & SF_ALIAS;
   1321 				c = getsc_uu();
   1322 				if (c) {
   1323 					s->flags |= SF_ALIASEND;
   1324 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1325 					s->start = s->str = s->ugbuf;
   1326 					s->next = source;
   1327 					source = s;
   1328 				} else {
   1329 					s = source;
   1330 					/* avoid reading EOF twice */
   1331 					s->str = NULL;
   1332 					break;
   1333 				}
   1334 			}
   1335 			continue;
   1336 
   1337 		case SREREAD:
   1338 			if (s->start != s->ugbuf)
   1339 				/* yuck */
   1340 				afree(s->u.freeme, ATEMP);
   1341 			source = s = s->next;
   1342 			continue;
   1343 		}
   1344 		if (s->str == NULL) {
   1345 			s->type = SEOF;
   1346 			s->start = s->str = null;
   1347 			return ('\0');
   1348 		}
   1349 		if (s->flags & SF_ECHO) {
   1350 			shf_puts(s->str, shl_out);
   1351 			shf_flush(shl_out);
   1352 		}
   1353 	}
   1354 	return (c);
   1355 }
   1356 
   1357 static void
   1358 getsc_line(Source *s)
   1359 {
   1360 	char *xp = Xstring(s->xs, xp), *cp;
   1361 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
   1362 	bool have_tty = interactive && (s->flags & SF_TTY) && tty_hasstate;
   1363 
   1364 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1365 	XcheckN(s->xs, xp, LINE);
   1366 	*xp = '\0';
   1367 	s->start = s->str = xp;
   1368 
   1369 	if (have_tty && ksh_tmout) {
   1370 		ksh_tmout_state = TMOUT_READING;
   1371 		alarm(ksh_tmout);
   1372 	}
   1373 	if (interactive) {
   1374 		if (cur_prompt == PS1)
   1375 			histsave(&s->line, NULL, HIST_FLUSH, true);
   1376 		change_winsz();
   1377 	}
   1378 #ifndef MKSH_NO_CMDLINE_EDITING
   1379 	if (have_tty && (
   1380 #if !MKSH_S_NOVI
   1381 	    Flag(FVI) ||
   1382 #endif
   1383 	    Flag(FEMACS) || Flag(FGMACS))) {
   1384 		int nread;
   1385 
   1386 		nread = x_read(xp);
   1387 		if (nread < 0)
   1388 			/* read error */
   1389 			nread = 0;
   1390 		xp[nread] = '\0';
   1391 		xp += nread;
   1392 	} else
   1393 #endif
   1394 	  {
   1395 		if (interactive)
   1396 			pprompt(prompt, 0);
   1397 		else
   1398 			s->line++;
   1399 
   1400 		while (/* CONSTCOND */ 1) {
   1401 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1402 
   1403 			if (!p && shf_error(s->u.shf) &&
   1404 			    shf_errno(s->u.shf) == EINTR) {
   1405 				shf_clearerr(s->u.shf);
   1406 				if (trap)
   1407 					runtraps(0);
   1408 				continue;
   1409 			}
   1410 			if (!p || (xp = p, xp[-1] == '\n'))
   1411 				break;
   1412 			/* double buffer size */
   1413 			/* move past NUL so doubling works... */
   1414 			xp++;
   1415 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1416 			/* ...and move back again */
   1417 			xp--;
   1418 		}
   1419 		/*
   1420 		 * flush any unwanted input so other programs/builtins
   1421 		 * can read it. Not very optimal, but less error prone
   1422 		 * than flushing else where, dealing with redirections,
   1423 		 * etc.
   1424 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
   1425 		 */
   1426 		if (s->type == SSTDIN)
   1427 			shf_flush(s->u.shf);
   1428 	}
   1429 	/*
   1430 	 * XXX: temporary kludge to restore source after a
   1431 	 * trap may have been executed.
   1432 	 */
   1433 	source = s;
   1434 	if (have_tty && ksh_tmout) {
   1435 		ksh_tmout_state = TMOUT_EXECUTING;
   1436 		alarm(0);
   1437 	}
   1438 	cp = Xstring(s->xs, xp);
   1439 	rndpush(cp);
   1440 	s->start = s->str = cp;
   1441 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1442 	/* Note: if input is all nulls, this is not eof */
   1443 	if (Xlength(s->xs, xp) == 0) {
   1444 		/* EOF */
   1445 		if (s->type == SFILE)
   1446 			shf_fdclose(s->u.shf);
   1447 		s->str = NULL;
   1448 	} else if (interactive && *s->str) {
   1449 		if (cur_prompt != PS1)
   1450 			histsave(&s->line, s->str, HIST_APPEND, true);
   1451 		else if (!ctype(*s->str, C_IFS | C_IFSWS))
   1452 			histsave(&s->line, s->str, HIST_QUEUE, true);
   1453 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
   1454 		else
   1455 			goto check_for_sole_return;
   1456 	} else if (interactive && cur_prompt == PS1) {
   1457  check_for_sole_return:
   1458 		cp = Xstring(s->xs, xp);
   1459 		while (ctype(*cp, C_IFSWS))
   1460 			++cp;
   1461 		if (!*cp) {
   1462 			histsave(&s->line, NULL, HIST_FLUSH, true);
   1463 			histsync();
   1464 		}
   1465 #endif
   1466 	}
   1467 	if (interactive)
   1468 		set_prompt(PS2, NULL);
   1469 }
   1470 
   1471 void
   1472 set_prompt(int to, Source *s)
   1473 {
   1474 	cur_prompt = (uint8_t)to;
   1475 
   1476 	switch (to) {
   1477 	/* command */
   1478 	case PS1:
   1479 		/*
   1480 		 * Substitute ! and !! here, before substitutions are done
   1481 		 * so ! in expanded variables are not expanded.
   1482 		 * NOTE: this is not what AT&T ksh does (it does it after
   1483 		 * substitutions, POSIX doesn't say which is to be done.
   1484 		 */
   1485 		{
   1486 			struct shf *shf;
   1487 			char * volatile ps1;
   1488 			Area *saved_atemp;
   1489 			int saved_lineno;
   1490 
   1491 			ps1 = str_val(global("PS1"));
   1492 			shf = shf_sopen(NULL, strlen(ps1) * 2,
   1493 			    SHF_WR | SHF_DYNAMIC, NULL);
   1494 			while (*ps1)
   1495 				if (*ps1 != '!' || *++ps1 == '!')
   1496 					shf_putchar(*ps1++, shf);
   1497 				else
   1498 					shf_fprintf(shf, Tf_lu, s ?
   1499 					    (unsigned long)s->line + 1 : 0UL);
   1500 			ps1 = shf_sclose(shf);
   1501 			saved_lineno = current_lineno;
   1502 			if (s)
   1503 				current_lineno = s->line + 1;
   1504 			saved_atemp = ATEMP;
   1505 			newenv(E_ERRH);
   1506 			if (kshsetjmp(e->jbuf)) {
   1507 				prompt = safe_prompt;
   1508 				/*
   1509 				 * Don't print an error - assume it has already
   1510 				 * been printed. Reason is we may have forked
   1511 				 * to run a command and the child may be
   1512 				 * unwinding its stack through this code as it
   1513 				 * exits.
   1514 				 */
   1515 			} else {
   1516 				char *cp = substitute(ps1, 0);
   1517 				strdupx(prompt, cp, saved_atemp);
   1518 			}
   1519 			current_lineno = saved_lineno;
   1520 			quitenv(NULL);
   1521 		}
   1522 		break;
   1523 	/* command continuation */
   1524 	case PS2:
   1525 		prompt = str_val(global("PS2"));
   1526 		break;
   1527 	}
   1528 }
   1529 
   1530 int
   1531 pprompt(const char *cp, int ntruncate)
   1532 {
   1533 	char delimiter = 0;
   1534 	bool doprint = (ntruncate != -1);
   1535 	bool indelimit = false;
   1536 	int columns = 0, lines = 0;
   1537 
   1538 	/*
   1539 	 * Undocumented AT&T ksh feature:
   1540 	 * If the second char in the prompt string is \r then the first
   1541 	 * char is taken to be a non-printing delimiter and any chars
   1542 	 * between two instances of the delimiter are not considered to
   1543 	 * be part of the prompt length
   1544 	 */
   1545 	if (*cp && cp[1] == '\r') {
   1546 		delimiter = *cp;
   1547 		cp += 2;
   1548 	}
   1549 	for (; *cp; cp++) {
   1550 		if (indelimit && *cp != delimiter)
   1551 			;
   1552 		else if (ctype(*cp, C_CR | C_LF)) {
   1553 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
   1554 			columns = 0;
   1555 		} else if (*cp == '\t') {
   1556 			columns = (columns | 7) + 1;
   1557 		} else if (*cp == '\b') {
   1558 			if (columns > 0)
   1559 				columns--;
   1560 		} else if (*cp == delimiter)
   1561 			indelimit = !indelimit;
   1562 		else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
   1563 			const char *cp2;
   1564 			columns += utf_widthadj(cp, &cp2);
   1565 			if (doprint && (indelimit ||
   1566 			    (ntruncate < (x_cols * lines + columns))))
   1567 				shf_write(cp, cp2 - cp, shl_out);
   1568 			cp = cp2 - /* loop increment */ 1;
   1569 			continue;
   1570 		} else
   1571 			columns++;
   1572 		if (doprint && (*cp != delimiter) &&
   1573 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
   1574 			shf_putc(*cp, shl_out);
   1575 	}
   1576 	if (doprint)
   1577 		shf_flush(shl_out);
   1578 	return (x_cols * lines + columns);
   1579 }
   1580 
   1581 /*
   1582  * Read the variable part of a ${...} expression (i.e. up to but not
   1583  * including the :[-+?=#%] or close-brace).
   1584  */
   1585 static char *
   1586 get_brace_var(XString *wsp, char *wp)
   1587 {
   1588 	char c;
   1589 	enum parse_state {
   1590 		PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
   1591 		PS_IDENT, PS_NUMBER, PS_VAR1
   1592 	} state = PS_INITIAL;
   1593 
   1594 	while (/* CONSTCOND */ 1) {
   1595 		c = getsc();
   1596 		/* State machine to figure out where the variable part ends. */
   1597 		switch (state) {
   1598 		case PS_SAW_HASH:
   1599 			if (ctype(c, C_VAR1)) {
   1600 				char c2;
   1601 
   1602 				c2 = getsc();
   1603 				ungetsc(c2);
   1604 				if (ord(c2) != ORD(/*{*/ '}')) {
   1605 					ungetsc(c);
   1606 					goto out;
   1607 				}
   1608 			}
   1609 			goto ps_common;
   1610 		case PS_SAW_BANG:
   1611 			switch (ord(c)) {
   1612 			case ORD('@'):
   1613 			case ORD('#'):
   1614 			case ORD('-'):
   1615 			case ORD('?'):
   1616 				goto out;
   1617 			}
   1618 			goto ps_common;
   1619 		case PS_INITIAL:
   1620 			switch (ord(c)) {
   1621 			case ORD('%'):
   1622 				state = PS_SAW_PERCENT;
   1623 				goto next;
   1624 			case ORD('#'):
   1625 				state = PS_SAW_HASH;
   1626 				goto next;
   1627 			case ORD('!'):
   1628 				state = PS_SAW_BANG;
   1629 				goto next;
   1630 			}
   1631 			/* FALLTHROUGH */
   1632 		case PS_SAW_PERCENT:
   1633  ps_common:
   1634 			if (ctype(c, C_ALPHX))
   1635 				state = PS_IDENT;
   1636 			else if (ctype(c, C_DIGIT))
   1637 				state = PS_NUMBER;
   1638 			else if (ctype(c, C_VAR1))
   1639 				state = PS_VAR1;
   1640 			else
   1641 				goto out;
   1642 			break;
   1643 		case PS_IDENT:
   1644 			if (!ctype(c, C_ALNUX)) {
   1645 				if (ord(c) == ORD('[')) {
   1646 					char *tmp, *p;
   1647 
   1648 					if (!arraysub(&tmp))
   1649 						yyerror("missing ]");
   1650 					*wp++ = c;
   1651 					p = tmp;
   1652 					while (*p) {
   1653 						Xcheck(*wsp, wp);
   1654 						*wp++ = *p++;
   1655 					}
   1656 					afree(tmp, ATEMP);
   1657 					/* the ] */
   1658 					c = getsc();
   1659 				}
   1660 				goto out;
   1661 			}
   1662  next:
   1663 			break;
   1664 		case PS_NUMBER:
   1665 			if (!ctype(c, C_DIGIT))
   1666 				goto out;
   1667 			break;
   1668 		case PS_VAR1:
   1669 			goto out;
   1670 		}
   1671 		Xcheck(*wsp, wp);
   1672 		*wp++ = c;
   1673 	}
   1674  out:
   1675 	/* end of variable part */
   1676 	*wp++ = '\0';
   1677 	ungetsc(c);
   1678 	return (wp);
   1679 }
   1680 
   1681 /*
   1682  * Save an array subscript - returns true if matching bracket found, false
   1683  * if eof or newline was found.
   1684  * (Returned string double null terminated)
   1685  */
   1686 static bool
   1687 arraysub(char **strp)
   1688 {
   1689 	XString ws;
   1690 	char *wp, c;
   1691 	/* we are just past the initial [ */
   1692 	unsigned int depth = 1;
   1693 
   1694 	Xinit(ws, wp, 32, ATEMP);
   1695 
   1696 	do {
   1697 		c = getsc();
   1698 		Xcheck(ws, wp);
   1699 		*wp++ = c;
   1700 		if (ord(c) == ORD('['))
   1701 			depth++;
   1702 		else if (ord(c) == ORD(']'))
   1703 			depth--;
   1704 	} while (depth > 0 && c && c != '\n');
   1705 
   1706 	*wp++ = '\0';
   1707 	*strp = Xclose(ws, wp);
   1708 
   1709 	return (tobool(depth == 0));
   1710 }
   1711 
   1712 /* Unget a char: handles case when we are already at the start of the buffer */
   1713 static void
   1714 ungetsc(int c)
   1715 {
   1716 	struct sretrace_info *rp = retrace_info;
   1717 
   1718 	if (backslash_skip)
   1719 		backslash_skip--;
   1720 	/* Don't unget EOF... */
   1721 	if (source->str == null && c == '\0')
   1722 		return;
   1723 	while (rp) {
   1724 		if (Xlength(rp->xs, rp->xp))
   1725 			rp->xp--;
   1726 		rp = rp->next;
   1727 	}
   1728 	ungetsc_i(c);
   1729 }
   1730 static void
   1731 ungetsc_i(int c)
   1732 {
   1733 	if (source->str > source->start)
   1734 		source->str--;
   1735 	else {
   1736 		Source *s;
   1737 
   1738 		s = pushs(SREREAD, source->areap);
   1739 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1740 		s->start = s->str = s->ugbuf;
   1741 		s->next = source;
   1742 		source = s;
   1743 	}
   1744 }
   1745 
   1746 
   1747 /* Called to get a char that isn't a \newline sequence. */
   1748 static int
   1749 getsc_bn(void)
   1750 {
   1751 	int c, c2;
   1752 
   1753 	if (ignore_backslash_newline)
   1754 		return (o_getsc_u());
   1755 
   1756 	if (backslash_skip == 1) {
   1757 		backslash_skip = 2;
   1758 		return (o_getsc_u());
   1759 	}
   1760 
   1761 	backslash_skip = 0;
   1762 
   1763 	while (/* CONSTCOND */ 1) {
   1764 		c = o_getsc_u();
   1765 		if (c == '\\') {
   1766 			if ((c2 = o_getsc_u()) == '\n')
   1767 				/* ignore the \newline; get the next char... */
   1768 				continue;
   1769 			ungetsc_i(c2);
   1770 			backslash_skip = 1;
   1771 		}
   1772 		return (c);
   1773 	}
   1774 }
   1775 
   1776 void
   1777 yyskiputf8bom(void)
   1778 {
   1779 	int c;
   1780 
   1781 	if (rtt2asc((c = o_getsc_u())) != 0xEF) {
   1782 		ungetsc_i(c);
   1783 		return;
   1784 	}
   1785 	if (rtt2asc((c = o_getsc_u())) != 0xBB) {
   1786 		ungetsc_i(c);
   1787 		ungetsc_i(asc2rtt(0xEF));
   1788 		return;
   1789 	}
   1790 	if (rtt2asc((c = o_getsc_u())) != 0xBF) {
   1791 		ungetsc_i(c);
   1792 		ungetsc_i(asc2rtt(0xBB));
   1793 		ungetsc_i(asc2rtt(0xEF));
   1794 		return;
   1795 	}
   1796 	UTFMODE |= 8;
   1797 }
   1798 
   1799 static Lex_state *
   1800 push_state_i(State_info *si, Lex_state *old_end)
   1801 {
   1802 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
   1803 
   1804 	news[0].ls_base = old_end;
   1805 	si->base = &news[0];
   1806 	si->end = &news[STATE_BSIZE];
   1807 	return (&news[1]);
   1808 }
   1809 
   1810 static Lex_state *
   1811 pop_state_i(State_info *si, Lex_state *old_end)
   1812 {
   1813 	Lex_state *old_base = si->base;
   1814 
   1815 	si->base = old_end->ls_base - STATE_BSIZE;
   1816 	si->end = old_end->ls_base;
   1817 
   1818 	afree(old_base, ATEMP);
   1819 
   1820 	return (si->base + STATE_BSIZE - 1);
   1821 }
   1822