Home | History | Annotate | Download | only in src
      1 /*	$OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
      5  *		 2011, 2012, 2013, 2014, 2015, 2016
      6  *	mirabilos <m (at) mirbsd.org>
      7  *
      8  * Provided that these terms and disclaimer and all copyright notices
      9  * are retained or reproduced in an accompanying document, permission
     10  * is granted to deal in this work without restriction, including un-
     11  * limited rights to use, publicly perform, distribute, sell, modify,
     12  * merge, give away, or sublicence.
     13  *
     14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
     15  * the utmost extent permitted by applicable law, neither express nor
     16  * implied; without malicious intent or gross negligence. In no event
     17  * may a licensor, author or contributor be held liable for indirect,
     18  * direct, other damage, loss, or other issues arising in any way out
     19  * of dealing in the work, even if advised of the possibility of such
     20  * damage or existence of a defect, except proven that it results out
     21  * of said person's immediate fault when using the work as intended.
     22  */
     23 
     24 #include "sh.h"
     25 
     26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.218 2016/01/20 21:34:12 tg Exp $");
     27 
     28 /*
     29  * states while lexing word
     30  */
     31 #define SBASE		0	/* outside any lexical constructs */
     32 #define SWORD		1	/* implicit quoting for substitute() */
     33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
     34 #define SSQUOTE		3	/* inside '' */
     35 #define SDQUOTE		4	/* inside "" */
     36 #define SEQUOTE		5	/* inside $'' */
     37 #define SBRACE		6	/* inside ${} */
     38 #define SQBRACE		7	/* inside "${}" */
     39 #define SBQUOTE		8	/* inside `` */
     40 #define SASPAREN	9	/* inside $(( )) */
     41 #define SHEREDELIM	10	/* parsing << or <<- delimiter */
     42 #define SHEREDQUOTE	11	/* parsing " in << or <<- delimiter */
     43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
     44 #define SADELIM		13	/* like SBASE, looking for delimiter */
     45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
     46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
     47 #define SINVALID	255	/* invalid state */
     48 
     49 struct sretrace_info {
     50 	struct sretrace_info *next;
     51 	XString xs;
     52 	char *xp;
     53 };
     54 
     55 /*
     56  * Structure to keep track of the lexing state and the various pieces of info
     57  * needed for each particular state.
     58  */
     59 typedef struct lex_state {
     60 	union {
     61 		/* point to the next state block */
     62 		struct lex_state *base;
     63 		/* marks start of state output in output string */
     64 		size_t start;
     65 		/* SBQUOTE: true if in double quotes: "`...`" */
     66 		/* SEQUOTE: got NUL, ignore rest of string */
     67 		bool abool;
     68 		/* SADELIM information */
     69 		struct {
     70 			/* character to search for */
     71 			unsigned char delimiter;
     72 			/* max. number of delimiters */
     73 			unsigned char num;
     74 		} adelim;
     75 	} u;
     76 	/* count open parentheses */
     77 	short nparen;
     78 	/* type of this state */
     79 	uint8_t type;
     80 } Lex_state;
     81 #define ls_base		u.base
     82 #define ls_start	u.start
     83 #define ls_bool		u.abool
     84 #define ls_adelim	u.adelim
     85 
     86 typedef struct {
     87 	Lex_state *base;
     88 	Lex_state *end;
     89 } State_info;
     90 
     91 static void readhere(struct ioword *);
     92 static void ungetsc(int);
     93 static void ungetsc_i(int);
     94 static int getsc_uu(void);
     95 static void getsc_line(Source *);
     96 static int getsc_bn(void);
     97 static int getsc_i(void);
     98 static char *get_brace_var(XString *, char *);
     99 static bool arraysub(char **);
    100 static void gethere(void);
    101 static Lex_state *push_state_i(State_info *, Lex_state *);
    102 static Lex_state *pop_state_i(State_info *, Lex_state *);
    103 
    104 static int backslash_skip;
    105 static int ignore_backslash_newline;
    106 
    107 /* optimised getsc_bn() */
    108 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
    109 			    !backslash_skip ? *source->str++ : getsc_bn())
    110 /* optimised getsc_uu() */
    111 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
    112 
    113 /* retrace helper */
    114 #define o_getsc_r(carg)					\
    115 	int cev = (carg);				\
    116 	struct sretrace_info *rp = retrace_info;	\
    117 							\
    118 	while (rp) {					\
    119 		Xcheck(rp->xs, rp->xp);			\
    120 		*rp->xp++ = cev;			\
    121 		rp = rp->next;				\
    122 	}						\
    123 							\
    124 	return (cev);
    125 
    126 /* callback */
    127 static int
    128 getsc_i(void)
    129 {
    130 	o_getsc_r(o_getsc());
    131 }
    132 
    133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
    134 #define getsc getsc_i
    135 #else
    136 static int getsc_r(int);
    137 
    138 static int
    139 getsc_r(int c)
    140 {
    141 	o_getsc_r(c);
    142 }
    143 
    144 #define getsc()		getsc_r(o_getsc())
    145 #endif
    146 
    147 #define STATE_BSIZE	8
    148 
    149 #define PUSH_STATE(s)	do {					\
    150 	if (++statep == state_info.end)				\
    151 		statep = push_state_i(&state_info, statep);	\
    152 	state = statep->type = (s);				\
    153 } while (/* CONSTCOND */ 0)
    154 
    155 #define POP_STATE()	do {					\
    156 	if (--statep == state_info.base)			\
    157 		statep = pop_state_i(&state_info, statep);	\
    158 	state = statep->type;					\
    159 } while (/* CONSTCOND */ 0)
    160 
    161 #define PUSH_SRETRACE(s) do {					\
    162 	struct sretrace_info *ri;				\
    163 								\
    164 	PUSH_STATE(s);						\
    165 	statep->ls_start = Xsavepos(ws, wp);			\
    166 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
    167 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
    168 	ri->next = retrace_info;				\
    169 	retrace_info = ri;					\
    170 } while (/* CONSTCOND */ 0)
    171 
    172 #define POP_SRETRACE()	do {					\
    173 	wp = Xrestpos(ws, wp, statep->ls_start);		\
    174 	*retrace_info->xp = '\0';				\
    175 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
    176 	dp = (void *)retrace_info;				\
    177 	retrace_info = retrace_info->next;			\
    178 	afree(dp, ATEMP);					\
    179 	POP_STATE();						\
    180 } while (/* CONSTCOND */ 0)
    181 
    182 /**
    183  * Lexical analyser
    184  *
    185  * tokens are not regular expressions, they are LL(1).
    186  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    187  * hence the state stack. Note "$(...)" are now parsed recursively.
    188  */
    189 
    190 int
    191 yylex(int cf)
    192 {
    193 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
    194 	State_info state_info;
    195 	int c, c2, state;
    196 	size_t cz;
    197 	XString ws;		/* expandable output word */
    198 	char *wp;		/* output word pointer */
    199 	char *sp, *dp;
    200 
    201  Again:
    202 	states[0].type = SINVALID;
    203 	states[0].ls_base = NULL;
    204 	statep = &states[1];
    205 	state_info.base = states;
    206 	state_info.end = &state_info.base[STATE_BSIZE];
    207 
    208 	Xinit(ws, wp, 64, ATEMP);
    209 
    210 	backslash_skip = 0;
    211 	ignore_backslash_newline = 0;
    212 
    213 	if (cf & ONEWORD)
    214 		state = SWORD;
    215 	else if (cf & LETEXPR) {
    216 		/* enclose arguments in (double) quotes */
    217 		*wp++ = OQUOTE;
    218 		state = SLETPAREN;
    219 		statep->nparen = 0;
    220 	} else {
    221 		/* normal lexing */
    222 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    223 		while ((c = getsc()) == ' ' || c == '\t')
    224 			;
    225 		if (c == '#') {
    226 			ignore_backslash_newline++;
    227 			while ((c = getsc()) != '\0' && c != '\n')
    228 				;
    229 			ignore_backslash_newline--;
    230 		}
    231 		ungetsc(c);
    232 	}
    233 	if (source->flags & SF_ALIAS) {
    234 		/* trailing ' ' in alias definition */
    235 		source->flags &= ~SF_ALIAS;
    236 		/* POSIX: trailing space only counts if parsing simple cmd */
    237 		if (!Flag(FPOSIX) || (cf & CMDWORD))
    238 			cf |= ALIAS;
    239 	}
    240 
    241 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
    242 	statep->type = state;
    243 
    244 	/* collect non-special or quoted characters to form word */
    245 	while (!((c = getsc()) == 0 ||
    246 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
    247 		if (state == SBASE &&
    248 		    subshell_nesting_type == /*{*/ '}' &&
    249 		    c == /*{*/ '}')
    250 			/* possibly end ${ :;} */
    251 			break;
    252 		Xcheck(ws, wp);
    253 		switch (state) {
    254 		case SADELIM:
    255 			if (c == '(')
    256 				statep->nparen++;
    257 			else if (c == ')')
    258 				statep->nparen--;
    259 			else if (statep->nparen == 0 && (c == /*{*/ '}' ||
    260 			    c == (int)statep->ls_adelim.delimiter)) {
    261 				*wp++ = ADELIM;
    262 				*wp++ = c;
    263 				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
    264 					POP_STATE();
    265 				if (c == /*{*/ '}')
    266 					POP_STATE();
    267 				break;
    268 			}
    269 			/* FALLTHROUGH */
    270 		case SBASE:
    271 			if (c == '[' && (cf & CMDASN)) {
    272 				/* temporary */
    273 				*wp = EOS;
    274 				if (is_wdvarname(Xstring(ws, wp), false)) {
    275 					char *p, *tmp;
    276 
    277 					if (arraysub(&tmp)) {
    278 						*wp++ = CHAR;
    279 						*wp++ = c;
    280 						for (p = tmp; *p; ) {
    281 							Xcheck(ws, wp);
    282 							*wp++ = CHAR;
    283 							*wp++ = *p++;
    284 						}
    285 						afree(tmp, ATEMP);
    286 						break;
    287 					} else {
    288 						Source *s;
    289 
    290 						s = pushs(SREREAD,
    291 						    source->areap);
    292 						s->start = s->str =
    293 						    s->u.freeme = tmp;
    294 						s->next = source;
    295 						source = s;
    296 					}
    297 				}
    298 				*wp++ = CHAR;
    299 				*wp++ = c;
    300 				break;
    301 			}
    302 			/* FALLTHROUGH */
    303  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
    304 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
    305 			    c == '!') {
    306 				c2 = getsc();
    307 				if (c2 == '(' /*)*/ ) {
    308 					*wp++ = OPAT;
    309 					*wp++ = c;
    310 					PUSH_STATE(SPATTERN);
    311 					break;
    312 				}
    313 				ungetsc(c2);
    314 			}
    315 			/* FALLTHROUGH */
    316  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
    317 			switch (c) {
    318 			case '\\':
    319  getsc_qchar:
    320 				if ((c = getsc())) {
    321 					/* trailing \ is lost */
    322 					*wp++ = QCHAR;
    323 					*wp++ = c;
    324 				}
    325 				break;
    326 			case '\'':
    327  open_ssquote_unless_heredoc:
    328 				if ((cf & HEREDOC))
    329 					goto store_char;
    330 				*wp++ = OQUOTE;
    331 				ignore_backslash_newline++;
    332 				PUSH_STATE(SSQUOTE);
    333 				break;
    334 			case '"':
    335  open_sdquote:
    336 				*wp++ = OQUOTE;
    337 				PUSH_STATE(SDQUOTE);
    338 				break;
    339 			case '$':
    340 				/*
    341 				 * processing of dollar sign belongs into
    342 				 * Subst, except for those which can open
    343 				 * a string: $'' and $""
    344 				 */
    345  subst_dollar_ex:
    346 				c = getsc();
    347 				switch (c) {
    348 				case '"':
    349 					goto open_sdquote;
    350 				case '\'':
    351 					goto open_sequote;
    352 				default:
    353 					goto SubstS;
    354 				}
    355 			default:
    356 				goto Subst;
    357 			}
    358 			break;
    359 
    360  Subst:
    361 			switch (c) {
    362 			case '\\':
    363 				c = getsc();
    364 				switch (c) {
    365 				case '"':
    366 					if ((cf & HEREDOC))
    367 						goto heredocquote;
    368 					/* FALLTHROUGH */
    369 				case '\\':
    370 				case '$': case '`':
    371  store_qchar:
    372 					*wp++ = QCHAR;
    373 					*wp++ = c;
    374 					break;
    375 				default:
    376  heredocquote:
    377 					Xcheck(ws, wp);
    378 					if (c) {
    379 						/* trailing \ is lost */
    380 						*wp++ = CHAR;
    381 						*wp++ = '\\';
    382 						*wp++ = CHAR;
    383 						*wp++ = c;
    384 					}
    385 					break;
    386 				}
    387 				break;
    388 			case '$':
    389 				c = getsc();
    390  SubstS:
    391 				if (c == '(') /*)*/ {
    392 					c = getsc();
    393 					if (c == '(') /*)*/ {
    394 						*wp++ = EXPRSUB;
    395 						PUSH_SRETRACE(SASPAREN);
    396 						statep->nparen = 2;
    397 						*retrace_info->xp++ = '(';
    398 					} else {
    399 						ungetsc(c);
    400  subst_command:
    401 						c = COMSUB;
    402  subst_command2:
    403 						sp = yyrecursive(c);
    404 						cz = strlen(sp) + 1;
    405 						XcheckN(ws, wp, cz);
    406 						*wp++ = c;
    407 						memcpy(wp, sp, cz);
    408 						wp += cz;
    409 					}
    410 				} else if (c == '{') /*}*/ {
    411 					if ((c = getsc()) == '|') {
    412 						/*
    413 						 * non-subenvironment
    414 						 * value substitution
    415 						 */
    416 						c = VALSUB;
    417 						goto subst_command2;
    418 					} else if (ctype(c, C_IFSWS)) {
    419 						/*
    420 						 * non-subenvironment
    421 						 * "command" substitution
    422 						 */
    423 						c = FUNSUB;
    424 						goto subst_command2;
    425 					}
    426 					ungetsc(c);
    427 					*wp++ = OSUBST;
    428 					*wp++ = '{'; /*}*/
    429 					wp = get_brace_var(&ws, wp);
    430 					c = getsc();
    431 					/* allow :# and :% (ksh88 compat) */
    432 					if (c == ':') {
    433 						*wp++ = CHAR;
    434 						*wp++ = c;
    435 						c = getsc();
    436 						if (c == ':') {
    437 							*wp++ = CHAR;
    438 							*wp++ = '0';
    439 							*wp++ = ADELIM;
    440 							*wp++ = ':';
    441 							PUSH_STATE(SBRACE);
    442 							PUSH_STATE(SADELIM);
    443 							statep->ls_adelim.delimiter = ':';
    444 							statep->ls_adelim.num = 1;
    445 							statep->nparen = 0;
    446 							break;
    447 						} else if (ksh_isdigit(c) ||
    448 						    c == '('/*)*/ || c == ' ' ||
    449 						    /*XXX what else? */
    450 						    c == '$') {
    451 							/* substring subst. */
    452 							if (c != ' ') {
    453 								*wp++ = CHAR;
    454 								*wp++ = ' ';
    455 							}
    456 							ungetsc(c);
    457 							PUSH_STATE(SBRACE);
    458 							PUSH_STATE(SADELIM);
    459 							statep->ls_adelim.delimiter = ':';
    460 							statep->ls_adelim.num = 2;
    461 							statep->nparen = 0;
    462 							break;
    463 						}
    464 					} else if (c == '/') {
    465 						*wp++ = CHAR;
    466 						*wp++ = c;
    467 						if ((c = getsc()) == '/') {
    468 							*wp++ = ADELIM;
    469 							*wp++ = c;
    470 						} else
    471 							ungetsc(c);
    472 						PUSH_STATE(SBRACE);
    473 						PUSH_STATE(SADELIM);
    474 						statep->ls_adelim.delimiter = '/';
    475 						statep->ls_adelim.num = 1;
    476 						statep->nparen = 0;
    477 						break;
    478 					}
    479 					/*
    480 					 * If this is a trim operation,
    481 					 * treat (,|,) specially in STBRACE.
    482 					 */
    483 					if (ctype(c, C_SUBOP2)) {
    484 						ungetsc(c);
    485 						if (Flag(FSH))
    486 							PUSH_STATE(STBRACEBOURNE);
    487 						else
    488 							PUSH_STATE(STBRACEKORN);
    489 					} else {
    490 						ungetsc(c);
    491 						if (state == SDQUOTE ||
    492 						    state == SQBRACE)
    493 							PUSH_STATE(SQBRACE);
    494 						else
    495 							PUSH_STATE(SBRACE);
    496 					}
    497 				} else if (ksh_isalphx(c)) {
    498 					*wp++ = OSUBST;
    499 					*wp++ = 'X';
    500 					do {
    501 						Xcheck(ws, wp);
    502 						*wp++ = c;
    503 						c = getsc();
    504 					} while (ksh_isalnux(c));
    505 					*wp++ = '\0';
    506 					*wp++ = CSUBST;
    507 					*wp++ = 'X';
    508 					ungetsc(c);
    509 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
    510 					Xcheck(ws, wp);
    511 					*wp++ = OSUBST;
    512 					*wp++ = 'X';
    513 					*wp++ = c;
    514 					*wp++ = '\0';
    515 					*wp++ = CSUBST;
    516 					*wp++ = 'X';
    517 				} else {
    518 					*wp++ = CHAR;
    519 					*wp++ = '$';
    520 					ungetsc(c);
    521 				}
    522 				break;
    523 			case '`':
    524  subst_gravis:
    525 				PUSH_STATE(SBQUOTE);
    526 				*wp++ = COMSUB;
    527 				/*
    528 				 * We need to know whether we are within double
    529 				 * quotes, since most shells translate \" to "
    530 				 * within "`\"`". This is not done in POSIX
    531 				 * mode (2.2.3 Double-Quotes: The backquote
    532 				 * shall retain its special meaning introducing
    533 				 * the other form of command substitution (see
    534 				 * Command Substitution). The portion of the
    535 				 * quoted string from the initial backquote and
    536 				 * the characters up to the next backquote that
    537 				 * is not preceded by a <backslash>, having
    538 				 * escape characters removed, defines that
    539 				 * command whose output replaces "`...`" when
    540 				 * the word is expanded.; 2.6.3 Command
    541 				 * Substitution: Within the backquoted style
    542 				 * of command substitution, <backslash> shall
    543 				 * retain its literal meaning, except when
    544 				 * followed by: '$', '`', or <backslash>. The
    545 				 * search for the matching backquote shall be
    546 				 * satisfied by the first unquoted non-escaped
    547 				 * backquote; during this search, if a
    548 				 * non-escaped backquote is encountered[],
    549 				 * undefined results occur.).
    550 				 */
    551 				statep->ls_bool = false;
    552 #ifdef austingroupbugs1015_is_still_not_resolved
    553 				if (Flag(FPOSIX))
    554 					break;
    555 #endif
    556 				s2 = statep;
    557 				base = state_info.base;
    558 				while (/* CONSTCOND */ 1) {
    559 					for (; s2 != base; s2--) {
    560 						if (s2->type == SDQUOTE) {
    561 							statep->ls_bool = true;
    562 							break;
    563 						}
    564 					}
    565 					if (s2 != base)
    566 						break;
    567 					if (!(s2 = s2->ls_base))
    568 						break;
    569 					base = s2-- - STATE_BSIZE;
    570 				}
    571 				break;
    572 			case QCHAR:
    573 				if (cf & LQCHAR) {
    574 					*wp++ = QCHAR;
    575 					*wp++ = getsc();
    576 					break;
    577 				}
    578 				/* FALLTHROUGH */
    579 			default:
    580  store_char:
    581 				*wp++ = CHAR;
    582 				*wp++ = c;
    583 			}
    584 			break;
    585 
    586 		case SEQUOTE:
    587 			if (c == '\'') {
    588 				POP_STATE();
    589 				*wp++ = CQUOTE;
    590 				ignore_backslash_newline--;
    591 			} else if (c == '\\') {
    592 				if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
    593 					c2 = getsc();
    594 				if (c2 == 0)
    595 					statep->ls_bool = true;
    596 				if (!statep->ls_bool) {
    597 					char ts[4];
    598 
    599 					if ((unsigned int)c2 < 0x100) {
    600 						*wp++ = QCHAR;
    601 						*wp++ = c2;
    602 					} else {
    603 						cz = utf_wctomb(ts, c2 - 0x100);
    604 						ts[cz] = 0;
    605 						cz = 0;
    606 						do {
    607 							*wp++ = QCHAR;
    608 							*wp++ = ts[cz];
    609 						} while (ts[++cz]);
    610 					}
    611 				}
    612 			} else if (!statep->ls_bool) {
    613 				*wp++ = QCHAR;
    614 				*wp++ = c;
    615 			}
    616 			break;
    617 
    618 		case SSQUOTE:
    619 			if (c == '\'') {
    620 				POP_STATE();
    621 				if ((cf & HEREDOC) || state == SQBRACE)
    622 					goto store_char;
    623 				*wp++ = CQUOTE;
    624 				ignore_backslash_newline--;
    625 			} else {
    626 				*wp++ = QCHAR;
    627 				*wp++ = c;
    628 			}
    629 			break;
    630 
    631 		case SDQUOTE:
    632 			if (c == '"') {
    633 				POP_STATE();
    634 				*wp++ = CQUOTE;
    635 			} else
    636 				goto Subst;
    637 			break;
    638 
    639 		/* $(( ... )) */
    640 		case SASPAREN:
    641 			if (c == '(')
    642 				statep->nparen++;
    643 			else if (c == ')') {
    644 				statep->nparen--;
    645 				if (statep->nparen == 1) {
    646 					/* end of EXPRSUB */
    647 					POP_SRETRACE();
    648 
    649 					if ((c2 = getsc()) == /*(*/ ')') {
    650 						cz = strlen(sp) - 2;
    651 						XcheckN(ws, wp, cz);
    652 						memcpy(wp, sp + 1, cz);
    653 						wp += cz;
    654 						afree(sp, ATEMP);
    655 						*wp++ = '\0';
    656 						break;
    657 					} else {
    658 						Source *s;
    659 
    660 						ungetsc(c2);
    661 						/*
    662 						 * mismatched parenthesis -
    663 						 * assume we were really
    664 						 * parsing a $(...) expression
    665 						 */
    666 						--wp;
    667 						s = pushs(SREREAD,
    668 						    source->areap);
    669 						s->start = s->str =
    670 						    s->u.freeme = sp;
    671 						s->next = source;
    672 						source = s;
    673 						goto subst_command;
    674 					}
    675 				}
    676 			}
    677 			/* reuse existing state machine */
    678 			goto Sbase2;
    679 
    680 		case SQBRACE:
    681 			if (c == '\\') {
    682 				/*
    683 				 * perform POSIX "quote removal" if the back-
    684 				 * slash is "special", i.e. same cases as the
    685 				 * {case '\\':} in Subst: plus closing brace;
    686 				 * in mksh code "quote removal" on '\c' means
    687 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
    688 				 * emitted (in heredocquote:)
    689 				 */
    690 				if ((c = getsc()) == '"' || c == '\\' ||
    691 				    c == '$' || c == '`' || c == /*{*/'}')
    692 					goto store_qchar;
    693 				goto heredocquote;
    694 			}
    695 			goto common_SQBRACE;
    696 
    697 		case SBRACE:
    698 			if (c == '\'')
    699 				goto open_ssquote_unless_heredoc;
    700 			else if (c == '\\')
    701 				goto getsc_qchar;
    702  common_SQBRACE:
    703 			if (c == '"')
    704 				goto open_sdquote;
    705 			else if (c == '$')
    706 				goto subst_dollar_ex;
    707 			else if (c == '`')
    708 				goto subst_gravis;
    709 			else if (c != /*{*/ '}')
    710 				goto store_char;
    711 			POP_STATE();
    712 			*wp++ = CSUBST;
    713 			*wp++ = /*{*/ '}';
    714 			break;
    715 
    716 		/* Same as SBASE, except (,|,) treated specially */
    717 		case STBRACEKORN:
    718 			if (c == '|')
    719 				*wp++ = SPAT;
    720 			else if (c == '(') {
    721 				*wp++ = OPAT;
    722 				/* simile for @ */
    723 				*wp++ = ' ';
    724 				PUSH_STATE(SPATTERN);
    725 			} else /* FALLTHROUGH */
    726 		case STBRACEBOURNE:
    727 			  if (c == /*{*/ '}') {
    728 				POP_STATE();
    729 				*wp++ = CSUBST;
    730 				*wp++ = /*{*/ '}';
    731 			} else
    732 				goto Sbase1;
    733 			break;
    734 
    735 		case SBQUOTE:
    736 			if (c == '`') {
    737 				*wp++ = 0;
    738 				POP_STATE();
    739 			} else if (c == '\\') {
    740 				switch (c = getsc()) {
    741 				case 0:
    742 					/* trailing \ is lost */
    743 					break;
    744 				case '$':
    745 				case '`':
    746 				case '\\':
    747 					*wp++ = c;
    748 					break;
    749 				case '"':
    750 					if (statep->ls_bool) {
    751 						*wp++ = c;
    752 						break;
    753 					}
    754 					/* FALLTHROUGH */
    755 				default:
    756 					*wp++ = '\\';
    757 					*wp++ = c;
    758 					break;
    759 				}
    760 			} else
    761 				*wp++ = c;
    762 			break;
    763 
    764 		/* ONEWORD */
    765 		case SWORD:
    766 			goto Subst;
    767 
    768 		/* LETEXPR: (( ... )) */
    769 		case SLETPAREN:
    770 			if (c == /*(*/ ')') {
    771 				if (statep->nparen > 0)
    772 					--statep->nparen;
    773 				else if ((c2 = getsc()) == /*(*/ ')') {
    774 					c = 0;
    775 					*wp++ = CQUOTE;
    776 					goto Done;
    777 				} else {
    778 					Source *s;
    779 
    780 					ungetsc(c2);
    781 					ungetsc(c);
    782 					/*
    783 					 * mismatched parenthesis -
    784 					 * assume we were really
    785 					 * parsing a (...) expression
    786 					 */
    787 					*wp = EOS;
    788 					sp = Xstring(ws, wp);
    789 					dp = wdstrip(sp + 1, WDS_TPUTS);
    790 					s = pushs(SREREAD, source->areap);
    791 					s->start = s->str = s->u.freeme = dp;
    792 					s->next = source;
    793 					source = s;
    794 					ungetsc('('/*)*/);
    795 					return ('('/*)*/);
    796 				}
    797 			} else if (c == '(')
    798 				/*
    799 				 * parentheses inside quotes and
    800 				 * backslashes are lost, but AT&T ksh
    801 				 * doesn't count them either
    802 				 */
    803 				++statep->nparen;
    804 			goto Sbase2;
    805 
    806 		/* << or <<- delimiter */
    807 		case SHEREDELIM:
    808 			/*
    809 			 * here delimiters need a special case since
    810 			 * $ and `...` are not to be treated specially
    811 			 */
    812 			switch (c) {
    813 			case '\\':
    814 				if ((c = getsc())) {
    815 					/* trailing \ is lost */
    816 					*wp++ = QCHAR;
    817 					*wp++ = c;
    818 				}
    819 				break;
    820 			case '\'':
    821 				goto open_ssquote_unless_heredoc;
    822 			case '$':
    823 				if ((c2 = getsc()) == '\'') {
    824  open_sequote:
    825 					*wp++ = OQUOTE;
    826 					ignore_backslash_newline++;
    827 					PUSH_STATE(SEQUOTE);
    828 					statep->ls_bool = false;
    829 					break;
    830 				} else if (c2 == '"') {
    831 					/* FALLTHROUGH */
    832 			case '"':
    833 					PUSH_SRETRACE(SHEREDQUOTE);
    834 					break;
    835 				}
    836 				ungetsc(c2);
    837 				/* FALLTHROUGH */
    838 			default:
    839 				*wp++ = CHAR;
    840 				*wp++ = c;
    841 			}
    842 			break;
    843 
    844 		/* " in << or <<- delimiter */
    845 		case SHEREDQUOTE:
    846 			if (c != '"')
    847 				goto Subst;
    848 			POP_SRETRACE();
    849 			dp = strnul(sp) - 1;
    850 			/* remove the trailing double quote */
    851 			*dp = '\0';
    852 			/* store the quoted string */
    853 			*wp++ = OQUOTE;
    854 			XcheckN(ws, wp, (dp - sp) * 2);
    855 			dp = sp;
    856 			while ((c = *dp++)) {
    857 				if (c == '\\') {
    858 					switch ((c = *dp++)) {
    859 					case '\\':
    860 					case '"':
    861 					case '$':
    862 					case '`':
    863 						break;
    864 					default:
    865 						*wp++ = CHAR;
    866 						*wp++ = '\\';
    867 						break;
    868 					}
    869 				}
    870 				*wp++ = CHAR;
    871 				*wp++ = c;
    872 			}
    873 			afree(sp, ATEMP);
    874 			*wp++ = CQUOTE;
    875 			state = statep->type = SHEREDELIM;
    876 			break;
    877 
    878 		/* in *(...|...) pattern (*+?@!) */
    879 		case SPATTERN:
    880 			if (c == /*(*/ ')') {
    881 				*wp++ = CPAT;
    882 				POP_STATE();
    883 			} else if (c == '|') {
    884 				*wp++ = SPAT;
    885 			} else if (c == '(') {
    886 				*wp++ = OPAT;
    887 				/* simile for @ */
    888 				*wp++ = ' ';
    889 				PUSH_STATE(SPATTERN);
    890 			} else
    891 				goto Sbase1;
    892 			break;
    893 		}
    894 	}
    895  Done:
    896 	Xcheck(ws, wp);
    897 	if (statep != &states[1])
    898 		/* XXX figure out what is missing */
    899 		yyerror("no closing quote\n");
    900 
    901 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    902 	if (state == SHEREDELIM)
    903 		state = SBASE;
    904 
    905 	dp = Xstring(ws, wp);
    906 	if (state == SBASE && (
    907 #ifndef MKSH_LEGACY_MODE
    908 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
    909 #endif
    910 	    c == '<' || c == '>')) {
    911 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
    912 
    913 		if (Xlength(ws, wp) == 0)
    914 			iop->unit = c == '<' ? 0 : 1;
    915 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
    916 			if (dp[c2] != CHAR)
    917 				goto no_iop;
    918 			if (!ksh_isdigit(dp[c2 + 1]))
    919 				goto no_iop;
    920 			iop->unit = iop->unit * 10 + ksh_numdig(dp[c2 + 1]);
    921 			if (iop->unit >= FDBASE)
    922 				goto no_iop;
    923 		}
    924 
    925 		if (c == '&') {
    926 			if ((c2 = getsc()) != '>') {
    927 				ungetsc(c2);
    928 				goto no_iop;
    929 			}
    930 			c = c2;
    931 			iop->ioflag = IOBASH;
    932 		} else
    933 			iop->ioflag = 0;
    934 
    935 		c2 = getsc();
    936 		/* <<, >>, <> are ok, >< is not */
    937 		if (c == c2 || (c == '<' && c2 == '>')) {
    938 			iop->ioflag |= c == c2 ?
    939 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
    940 			if (iop->ioflag == IOHERE) {
    941 				if ((c2 = getsc()) == '-')
    942 					iop->ioflag |= IOSKIP;
    943 				else if (c2 == '<')
    944 					iop->ioflag |= IOHERESTR;
    945 				else
    946 					ungetsc(c2);
    947 			}
    948 		} else if (c2 == '&')
    949 			iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
    950 		else {
    951 			iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
    952 			if (c == '>' && c2 == '|')
    953 				iop->ioflag |= IOCLOB;
    954 			else
    955 				ungetsc(c2);
    956 		}
    957 
    958 		iop->ioname = NULL;
    959 		iop->delim = NULL;
    960 		iop->heredoc = NULL;
    961 		/* free word */
    962 		Xfree(ws, wp);
    963 		yylval.iop = iop;
    964 		return (REDIR);
    965  no_iop:
    966 		afree(iop, ATEMP);
    967 	}
    968 
    969 	if (wp == dp && state == SBASE) {
    970 		/* free word */
    971 		Xfree(ws, wp);
    972 		/* no word, process LEX1 character */
    973 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
    974 			if ((c2 = getsc()) == c)
    975 				c = (c == ';') ? BREAK :
    976 				    (c == '|') ? LOGOR :
    977 				    (c == '&') ? LOGAND :
    978 				    /* c == '(' ) */ MDPAREN;
    979 			else if (c == '|' && c2 == '&')
    980 				c = COPROC;
    981 			else if (c == ';' && c2 == '|')
    982 				c = BRKEV;
    983 			else if (c == ';' && c2 == '&')
    984 				c = BRKFT;
    985 			else
    986 				ungetsc(c2);
    987 #ifndef MKSH_SMALL
    988 			if (c == BREAK) {
    989 				if ((c2 = getsc()) == '&')
    990 					c = BRKEV;
    991 				else
    992 					ungetsc(c2);
    993 			}
    994 #endif
    995 		} else if (c == '\n') {
    996 			if (cf & HEREDELIM)
    997 				ungetsc(c);
    998 			else {
    999 				gethere();
   1000 				if (cf & CONTIN)
   1001 					goto Again;
   1002 			}
   1003 		}
   1004 		return (c);
   1005 	}
   1006 
   1007 	/* terminate word */
   1008 	*wp++ = EOS;
   1009 	yylval.cp = Xclose(ws, wp);
   1010 	if (state == SWORD || state == SLETPAREN
   1011 	    /* XXX ONEWORD? */)
   1012 		return (LWORD);
   1013 
   1014 	/* unget terminator */
   1015 	ungetsc(c);
   1016 
   1017 	/*
   1018 	 * note: the alias-vs-function code below depends on several
   1019 	 * interna: starting from here, source->str is not modified;
   1020 	 * the way getsc() and ungetsc() operate; etc.
   1021 	 */
   1022 
   1023 	/* copy word to unprefixed string ident */
   1024 	sp = yylval.cp;
   1025 	dp = ident;
   1026 	while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
   1027 		*dp++ = *sp++;
   1028 	if (c != EOS)
   1029 		/* word is not unquoted */
   1030 		dp = ident;
   1031 	/* make sure the ident array stays NUL padded */
   1032 	memset(dp, 0, (ident + IDENT) - dp + 1);
   1033 
   1034 	if (!(cf & (KEYWORD | ALIAS)))
   1035 		return (LWORD);
   1036 
   1037 	if (*ident != '\0') {
   1038 		struct tbl *p;
   1039 		uint32_t h = hash(ident);
   1040 
   1041 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
   1042 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
   1043 		    p->val.i == /*{*/ '}')) {
   1044 			afree(yylval.cp, ATEMP);
   1045 			return (p->val.i);
   1046 		}
   1047 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
   1048 		    (p->flag & ISSET)) {
   1049 			/*
   1050 			 * this still points to the same character as the
   1051 			 * ungetsc'd terminator from above
   1052 			 */
   1053 			const char *cp = source->str;
   1054 
   1055 			/* prefer POSIX but not Korn functions over aliases */
   1056 			while (*cp == ' ' || *cp == '\t')
   1057 				/*
   1058 				 * this is like getsc() without skipping
   1059 				 * over Source boundaries (including not
   1060 				 * parsing ungetsc'd characters that got
   1061 				 * pushed into an SREREAD) which is what
   1062 				 * we want here anyway: find out whether
   1063 				 * the alias name is followed by a POSIX
   1064 				 * function definition
   1065 				 */
   1066 				++cp;
   1067 			/* prefer functions over aliases */
   1068 			if (cp[0] != '(' || cp[1] != ')') {
   1069 				Source *s = source;
   1070 
   1071 				while (s && (s->flags & SF_HASALIAS))
   1072 					if (s->u.tblp == p)
   1073 						return (LWORD);
   1074 					else
   1075 						s = s->next;
   1076 				/* push alias expansion */
   1077 				s = pushs(SALIAS, source->areap);
   1078 				s->start = s->str = p->val.s;
   1079 				s->u.tblp = p;
   1080 				s->flags |= SF_HASALIAS;
   1081 				s->next = source;
   1082 				if (source->type == SEOF) {
   1083 					/* prevent infinite recursion at EOS */
   1084 					source->u.tblp = p;
   1085 					source->flags |= SF_HASALIAS;
   1086 				}
   1087 				source = s;
   1088 				afree(yylval.cp, ATEMP);
   1089 				goto Again;
   1090 			}
   1091 		}
   1092 	} else if (cf & ALIAS) {
   1093 		/* retain typeset et al. even when quoted */
   1094 		if (assign_command((dp = wdstrip(yylval.cp, 0)), true))
   1095 			strlcpy(ident, dp, sizeof(ident));
   1096 		afree(dp, ATEMP);
   1097 	}
   1098 
   1099 	return (LWORD);
   1100 }
   1101 
   1102 static void
   1103 gethere(void)
   1104 {
   1105 	struct ioword **p;
   1106 
   1107 	for (p = heres; p < herep; p++)
   1108 		if (!((*p)->ioflag & IOHERESTR))
   1109 			readhere(*p);
   1110 	herep = heres;
   1111 }
   1112 
   1113 /*
   1114  * read "<<word" text into temp file
   1115  */
   1116 
   1117 static void
   1118 readhere(struct ioword *iop)
   1119 {
   1120 	int c;
   1121 	const char *eof, *eofp;
   1122 	XString xs;
   1123 	char *xp;
   1124 	size_t xpos;
   1125 
   1126 	eof = evalstr(iop->delim, 0);
   1127 
   1128 	if (!(iop->ioflag & IOEVAL))
   1129 		ignore_backslash_newline++;
   1130 
   1131 	Xinit(xs, xp, 256, ATEMP);
   1132 
   1133  heredoc_read_line:
   1134 	/* beginning of line */
   1135 	eofp = eof;
   1136 	xpos = Xsavepos(xs, xp);
   1137 	if (iop->ioflag & IOSKIP) {
   1138 		/* skip over leading tabs */
   1139 		while ((c = getsc()) == '\t')
   1140 			;	/* nothing */
   1141 		goto heredoc_parse_char;
   1142 	}
   1143  heredoc_read_char:
   1144 	c = getsc();
   1145  heredoc_parse_char:
   1146 	/* compare with here document marker */
   1147 	if (!*eofp) {
   1148 		/* end of here document marker, what to do? */
   1149 		switch (c) {
   1150 		case /*(*/ ')':
   1151 			if (!subshell_nesting_type)
   1152 				/*-
   1153 				 * not allowed outside $(...) or (...)
   1154 				 * => mismatch
   1155 				 */
   1156 				break;
   1157 			/* allow $(...) or (...) to close here */
   1158 			ungetsc(/*(*/ ')');
   1159 			/* FALLTHROUGH */
   1160 		case 0:
   1161 			/*
   1162 			 * Allow EOF here to commands without trailing
   1163 			 * newlines (mksh -c '...') will work as well.
   1164 			 */
   1165 		case '\n':
   1166 			/* Newline terminates here document marker */
   1167 			goto heredoc_found_terminator;
   1168 		}
   1169 	} else if (c == *eofp++)
   1170 		/* store; then read and compare next character */
   1171 		goto heredoc_store_and_loop;
   1172 	/* nope, mismatch; read until end of line */
   1173 	while (c != '\n') {
   1174 		if (!c)
   1175 			/* oops, reached EOF */
   1176 			yyerror("%s '%s' unclosed\n", "here document", eof);
   1177 		/* store character */
   1178 		Xcheck(xs, xp);
   1179 		Xput(xs, xp, c);
   1180 		/* read next character */
   1181 		c = getsc();
   1182 	}
   1183 	/* we read a newline as last character */
   1184  heredoc_store_and_loop:
   1185 	/* store character */
   1186 	Xcheck(xs, xp);
   1187 	Xput(xs, xp, c);
   1188 	if (c == '\n')
   1189 		goto heredoc_read_line;
   1190 	goto heredoc_read_char;
   1191 
   1192  heredoc_found_terminator:
   1193 	/* jump back to saved beginning of line */
   1194 	xp = Xrestpos(xs, xp, xpos);
   1195 	/* terminate, close and store */
   1196 	Xput(xs, xp, '\0');
   1197 	iop->heredoc = Xclose(xs, xp);
   1198 
   1199 	if (!(iop->ioflag & IOEVAL))
   1200 		ignore_backslash_newline--;
   1201 }
   1202 
   1203 void
   1204 yyerror(const char *fmt, ...)
   1205 {
   1206 	va_list va;
   1207 
   1208 	/* pop aliases and re-reads */
   1209 	while (source->type == SALIAS || source->type == SREREAD)
   1210 		source = source->next;
   1211 	/* zap pending input */
   1212 	source->str = null;
   1213 
   1214 	error_prefix(true);
   1215 	va_start(va, fmt);
   1216 	shf_vfprintf(shl_out, fmt, va);
   1217 	va_end(va);
   1218 	errorfz();
   1219 }
   1220 
   1221 /*
   1222  * input for yylex with alias expansion
   1223  */
   1224 
   1225 Source *
   1226 pushs(int type, Area *areap)
   1227 {
   1228 	Source *s;
   1229 
   1230 	s = alloc(sizeof(Source), areap);
   1231 	memset(s, 0, sizeof(Source));
   1232 	s->type = type;
   1233 	s->str = null;
   1234 	s->areap = areap;
   1235 	if (type == SFILE || type == SSTDIN)
   1236 		XinitN(s->xs, 256, s->areap);
   1237 	return (s);
   1238 }
   1239 
   1240 static int
   1241 getsc_uu(void)
   1242 {
   1243 	Source *s = source;
   1244 	int c;
   1245 
   1246 	while ((c = *s->str++) == 0) {
   1247 		/* return 0 for EOF by default */
   1248 		s->str = NULL;
   1249 		switch (s->type) {
   1250 		case SEOF:
   1251 			s->str = null;
   1252 			return (0);
   1253 
   1254 		case SSTDIN:
   1255 		case SFILE:
   1256 			getsc_line(s);
   1257 			break;
   1258 
   1259 		case SWSTR:
   1260 			break;
   1261 
   1262 		case SSTRING:
   1263 		case SSTRINGCMDLINE:
   1264 			break;
   1265 
   1266 		case SWORDS:
   1267 			s->start = s->str = *s->u.strv++;
   1268 			s->type = SWORDSEP;
   1269 			break;
   1270 
   1271 		case SWORDSEP:
   1272 			if (*s->u.strv == NULL) {
   1273 				s->start = s->str = "\n";
   1274 				s->type = SEOF;
   1275 			} else {
   1276 				s->start = s->str = " ";
   1277 				s->type = SWORDS;
   1278 			}
   1279 			break;
   1280 
   1281 		case SALIAS:
   1282 			if (s->flags & SF_ALIASEND) {
   1283 				/* pass on an unused SF_ALIAS flag */
   1284 				source = s->next;
   1285 				source->flags |= s->flags & SF_ALIAS;
   1286 				s = source;
   1287 			} else if (*s->u.tblp->val.s &&
   1288 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
   1289 				/* pop source stack */
   1290 				source = s = s->next;
   1291 				/*
   1292 				 * Note that this alias ended with a
   1293 				 * space, enabling alias expansion on
   1294 				 * the following word.
   1295 				 */
   1296 				s->flags |= SF_ALIAS;
   1297 			} else {
   1298 				/*
   1299 				 * At this point, we need to keep the current
   1300 				 * alias in the source list so recursive
   1301 				 * aliases can be detected and we also need to
   1302 				 * return the next character. Do this by
   1303 				 * temporarily popping the alias to get the
   1304 				 * next character and then put it back in the
   1305 				 * source list with the SF_ALIASEND flag set.
   1306 				 */
   1307 				/* pop source stack */
   1308 				source = s->next;
   1309 				source->flags |= s->flags & SF_ALIAS;
   1310 				c = getsc_uu();
   1311 				if (c) {
   1312 					s->flags |= SF_ALIASEND;
   1313 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1314 					s->start = s->str = s->ugbuf;
   1315 					s->next = source;
   1316 					source = s;
   1317 				} else {
   1318 					s = source;
   1319 					/* avoid reading EOF twice */
   1320 					s->str = NULL;
   1321 					break;
   1322 				}
   1323 			}
   1324 			continue;
   1325 
   1326 		case SREREAD:
   1327 			if (s->start != s->ugbuf)
   1328 				/* yuck */
   1329 				afree(s->u.freeme, ATEMP);
   1330 			source = s = s->next;
   1331 			continue;
   1332 		}
   1333 		if (s->str == NULL) {
   1334 			s->type = SEOF;
   1335 			s->start = s->str = null;
   1336 			return ('\0');
   1337 		}
   1338 		if (s->flags & SF_ECHO) {
   1339 			shf_puts(s->str, shl_out);
   1340 			shf_flush(shl_out);
   1341 		}
   1342 	}
   1343 	return (c);
   1344 }
   1345 
   1346 static void
   1347 getsc_line(Source *s)
   1348 {
   1349 	char *xp = Xstring(s->xs, xp), *cp;
   1350 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
   1351 	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
   1352 
   1353 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1354 	XcheckN(s->xs, xp, LINE);
   1355 	*xp = '\0';
   1356 	s->start = s->str = xp;
   1357 
   1358 	if (have_tty && ksh_tmout) {
   1359 		ksh_tmout_state = TMOUT_READING;
   1360 		alarm(ksh_tmout);
   1361 	}
   1362 	if (interactive)
   1363 		change_winsz();
   1364 #ifndef MKSH_NO_CMDLINE_EDITING
   1365 	if (have_tty && (
   1366 #if !MKSH_S_NOVI
   1367 	    Flag(FVI) ||
   1368 #endif
   1369 	    Flag(FEMACS) || Flag(FGMACS))) {
   1370 		int nread;
   1371 
   1372 		nread = x_read(xp);
   1373 		if (nread < 0)
   1374 			/* read error */
   1375 			nread = 0;
   1376 		xp[nread] = '\0';
   1377 		xp += nread;
   1378 	} else
   1379 #endif
   1380 	  {
   1381 		if (interactive)
   1382 			pprompt(prompt, 0);
   1383 		else
   1384 			s->line++;
   1385 
   1386 		while (/* CONSTCOND */ 1) {
   1387 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1388 
   1389 			if (!p && shf_error(s->u.shf) &&
   1390 			    shf_errno(s->u.shf) == EINTR) {
   1391 				shf_clearerr(s->u.shf);
   1392 				if (trap)
   1393 					runtraps(0);
   1394 				continue;
   1395 			}
   1396 			if (!p || (xp = p, xp[-1] == '\n'))
   1397 				break;
   1398 			/* double buffer size */
   1399 			/* move past NUL so doubling works... */
   1400 			xp++;
   1401 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1402 			/* ...and move back again */
   1403 			xp--;
   1404 		}
   1405 		/*
   1406 		 * flush any unwanted input so other programs/builtins
   1407 		 * can read it. Not very optimal, but less error prone
   1408 		 * than flushing else where, dealing with redirections,
   1409 		 * etc.
   1410 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
   1411 		 */
   1412 		if (s->type == SSTDIN)
   1413 			shf_flush(s->u.shf);
   1414 	}
   1415 	/*
   1416 	 * XXX: temporary kludge to restore source after a
   1417 	 * trap may have been executed.
   1418 	 */
   1419 	source = s;
   1420 	if (have_tty && ksh_tmout) {
   1421 		ksh_tmout_state = TMOUT_EXECUTING;
   1422 		alarm(0);
   1423 	}
   1424 	cp = Xstring(s->xs, xp);
   1425 	rndpush(cp);
   1426 	s->start = s->str = cp;
   1427 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1428 	/* Note: if input is all nulls, this is not eof */
   1429 	if (Xlength(s->xs, xp) == 0) {
   1430 		/* EOF */
   1431 		if (s->type == SFILE)
   1432 			shf_fdclose(s->u.shf);
   1433 		s->str = NULL;
   1434 	} else if (interactive && *s->str) {
   1435 		if (cur_prompt != PS1)
   1436 			histsave(&s->line, s->str, HIST_APPEND, true);
   1437 		else if (!ctype(*s->str, C_IFS | C_IFSWS))
   1438 			histsave(&s->line, s->str, HIST_QUEUE, true);
   1439 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
   1440 		else
   1441 			goto check_for_sole_return;
   1442 	} else if (interactive && cur_prompt == PS1) {
   1443  check_for_sole_return:
   1444 		cp = Xstring(s->xs, xp);
   1445 		while (*cp && ctype(*cp, C_IFSWS))
   1446 			++cp;
   1447 		if (!*cp) {
   1448 			histsave(&s->line, NULL, HIST_FLUSH, true);
   1449 			histsync();
   1450 		}
   1451 #endif
   1452 	}
   1453 	if (interactive)
   1454 		set_prompt(PS2, NULL);
   1455 }
   1456 
   1457 void
   1458 set_prompt(int to, Source *s)
   1459 {
   1460 	cur_prompt = (uint8_t)to;
   1461 
   1462 	switch (to) {
   1463 	/* command */
   1464 	case PS1:
   1465 		/*
   1466 		 * Substitute ! and !! here, before substitutions are done
   1467 		 * so ! in expanded variables are not expanded.
   1468 		 * NOTE: this is not what AT&T ksh does (it does it after
   1469 		 * substitutions, POSIX doesn't say which is to be done.
   1470 		 */
   1471 		{
   1472 			struct shf *shf;
   1473 			char * volatile ps1;
   1474 			Area *saved_atemp;
   1475 			int saved_lineno;
   1476 
   1477 			ps1 = str_val(global("PS1"));
   1478 			shf = shf_sopen(NULL, strlen(ps1) * 2,
   1479 			    SHF_WR | SHF_DYNAMIC, NULL);
   1480 			while (*ps1)
   1481 				if (*ps1 != '!' || *++ps1 == '!')
   1482 					shf_putchar(*ps1++, shf);
   1483 				else
   1484 					shf_fprintf(shf, "%lu", s ?
   1485 					    (unsigned long)s->line + 1 : 0UL);
   1486 			ps1 = shf_sclose(shf);
   1487 			saved_lineno = current_lineno;
   1488 			if (s)
   1489 				current_lineno = s->line + 1;
   1490 			saved_atemp = ATEMP;
   1491 			newenv(E_ERRH);
   1492 			if (kshsetjmp(e->jbuf)) {
   1493 				prompt = safe_prompt;
   1494 				/*
   1495 				 * Don't print an error - assume it has already
   1496 				 * been printed. Reason is we may have forked
   1497 				 * to run a command and the child may be
   1498 				 * unwinding its stack through this code as it
   1499 				 * exits.
   1500 				 */
   1501 			} else {
   1502 				char *cp = substitute(ps1, 0);
   1503 				strdupx(prompt, cp, saved_atemp);
   1504 			}
   1505 			current_lineno = saved_lineno;
   1506 			quitenv(NULL);
   1507 		}
   1508 		break;
   1509 	/* command continuation */
   1510 	case PS2:
   1511 		prompt = str_val(global("PS2"));
   1512 		break;
   1513 	}
   1514 }
   1515 
   1516 int
   1517 pprompt(const char *cp, int ntruncate)
   1518 {
   1519 	char delimiter = 0;
   1520 	bool doprint = (ntruncate != -1);
   1521 	bool indelimit = false;
   1522 	int columns = 0, lines = 0;
   1523 
   1524 	/*
   1525 	 * Undocumented AT&T ksh feature:
   1526 	 * If the second char in the prompt string is \r then the first
   1527 	 * char is taken to be a non-printing delimiter and any chars
   1528 	 * between two instances of the delimiter are not considered to
   1529 	 * be part of the prompt length
   1530 	 */
   1531 	if (*cp && cp[1] == '\r') {
   1532 		delimiter = *cp;
   1533 		cp += 2;
   1534 	}
   1535 	for (; *cp; cp++) {
   1536 		if (indelimit && *cp != delimiter)
   1537 			;
   1538 		else if (*cp == '\n' || *cp == '\r') {
   1539 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
   1540 			columns = 0;
   1541 		} else if (*cp == '\t') {
   1542 			columns = (columns | 7) + 1;
   1543 		} else if (*cp == '\b') {
   1544 			if (columns > 0)
   1545 				columns--;
   1546 		} else if (*cp == delimiter)
   1547 			indelimit = !indelimit;
   1548 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
   1549 			const char *cp2;
   1550 			columns += utf_widthadj(cp, &cp2);
   1551 			if (doprint && (indelimit ||
   1552 			    (ntruncate < (x_cols * lines + columns))))
   1553 				shf_write(cp, cp2 - cp, shl_out);
   1554 			cp = cp2 - /* loop increment */ 1;
   1555 			continue;
   1556 		} else
   1557 			columns++;
   1558 		if (doprint && (*cp != delimiter) &&
   1559 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
   1560 			shf_putc(*cp, shl_out);
   1561 	}
   1562 	if (doprint)
   1563 		shf_flush(shl_out);
   1564 	return (x_cols * lines + columns);
   1565 }
   1566 
   1567 /*
   1568  * Read the variable part of a ${...} expression (i.e. up to but not
   1569  * including the :[-+?=#%] or close-brace).
   1570  */
   1571 static char *
   1572 get_brace_var(XString *wsp, char *wp)
   1573 {
   1574 	char c;
   1575 	enum parse_state {
   1576 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
   1577 		PS_NUMBER, PS_VAR1
   1578 	} state = PS_INITIAL;
   1579 
   1580 	while (/* CONSTCOND */ 1) {
   1581 		c = getsc();
   1582 		/* State machine to figure out where the variable part ends. */
   1583 		switch (state) {
   1584 		case PS_INITIAL:
   1585 			if (c == '#' || c == '!' || c == '%') {
   1586 				state = PS_SAW_HASH;
   1587 				break;
   1588 			}
   1589 			/* FALLTHROUGH */
   1590 		case PS_SAW_HASH:
   1591 			if (ksh_isalphx(c))
   1592 				state = PS_IDENT;
   1593 			else if (ksh_isdigit(c))
   1594 				state = PS_NUMBER;
   1595 			else if (c == '#') {
   1596 				if (state == PS_SAW_HASH) {
   1597 					char c2;
   1598 
   1599 					c2 = getsc();
   1600 					ungetsc(c2);
   1601 					if (c2 != /*{*/ '}') {
   1602 						ungetsc(c);
   1603 						goto out;
   1604 					}
   1605 				}
   1606 				state = PS_VAR1;
   1607 			} else if (ctype(c, C_VAR1))
   1608 				state = PS_VAR1;
   1609 			else
   1610 				goto out;
   1611 			break;
   1612 		case PS_IDENT:
   1613 			if (!ksh_isalnux(c)) {
   1614 				if (c == '[') {
   1615 					char *tmp, *p;
   1616 
   1617 					if (!arraysub(&tmp))
   1618 						yyerror("missing ]\n");
   1619 					*wp++ = c;
   1620 					for (p = tmp; *p; ) {
   1621 						Xcheck(*wsp, wp);
   1622 						*wp++ = *p++;
   1623 					}
   1624 					afree(tmp, ATEMP);
   1625 					/* the ] */
   1626 					c = getsc();
   1627 				}
   1628 				goto out;
   1629 			}
   1630 			break;
   1631 		case PS_NUMBER:
   1632 			if (!ksh_isdigit(c))
   1633 				goto out;
   1634 			break;
   1635 		case PS_VAR1:
   1636 			goto out;
   1637 		}
   1638 		Xcheck(*wsp, wp);
   1639 		*wp++ = c;
   1640 	}
   1641  out:
   1642 	/* end of variable part */
   1643 	*wp++ = '\0';
   1644 	ungetsc(c);
   1645 	return (wp);
   1646 }
   1647 
   1648 /*
   1649  * Save an array subscript - returns true if matching bracket found, false
   1650  * if eof or newline was found.
   1651  * (Returned string double null terminated)
   1652  */
   1653 static bool
   1654 arraysub(char **strp)
   1655 {
   1656 	XString ws;
   1657 	char *wp, c;
   1658 	/* we are just past the initial [ */
   1659 	unsigned int depth = 1;
   1660 
   1661 	Xinit(ws, wp, 32, ATEMP);
   1662 
   1663 	do {
   1664 		c = getsc();
   1665 		Xcheck(ws, wp);
   1666 		*wp++ = c;
   1667 		if (c == '[')
   1668 			depth++;
   1669 		else if (c == ']')
   1670 			depth--;
   1671 	} while (depth > 0 && c && c != '\n');
   1672 
   1673 	*wp++ = '\0';
   1674 	*strp = Xclose(ws, wp);
   1675 
   1676 	return (tobool(depth == 0));
   1677 }
   1678 
   1679 /* Unget a char: handles case when we are already at the start of the buffer */
   1680 static void
   1681 ungetsc(int c)
   1682 {
   1683 	struct sretrace_info *rp = retrace_info;
   1684 
   1685 	if (backslash_skip)
   1686 		backslash_skip--;
   1687 	/* Don't unget EOF... */
   1688 	if (source->str == null && c == '\0')
   1689 		return;
   1690 	while (rp) {
   1691 		if (Xlength(rp->xs, rp->xp))
   1692 			rp->xp--;
   1693 		rp = rp->next;
   1694 	}
   1695 	ungetsc_i(c);
   1696 }
   1697 static void
   1698 ungetsc_i(int c)
   1699 {
   1700 	if (source->str > source->start)
   1701 		source->str--;
   1702 	else {
   1703 		Source *s;
   1704 
   1705 		s = pushs(SREREAD, source->areap);
   1706 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1707 		s->start = s->str = s->ugbuf;
   1708 		s->next = source;
   1709 		source = s;
   1710 	}
   1711 }
   1712 
   1713 
   1714 /* Called to get a char that isn't a \newline sequence. */
   1715 static int
   1716 getsc_bn(void)
   1717 {
   1718 	int c, c2;
   1719 
   1720 	if (ignore_backslash_newline)
   1721 		return (o_getsc_u());
   1722 
   1723 	if (backslash_skip == 1) {
   1724 		backslash_skip = 2;
   1725 		return (o_getsc_u());
   1726 	}
   1727 
   1728 	backslash_skip = 0;
   1729 
   1730 	while (/* CONSTCOND */ 1) {
   1731 		c = o_getsc_u();
   1732 		if (c == '\\') {
   1733 			if ((c2 = o_getsc_u()) == '\n')
   1734 				/* ignore the \newline; get the next char... */
   1735 				continue;
   1736 			ungetsc_i(c2);
   1737 			backslash_skip = 1;
   1738 		}
   1739 		return (c);
   1740 	}
   1741 }
   1742 
   1743 void
   1744 yyskiputf8bom(void)
   1745 {
   1746 	int c;
   1747 
   1748 	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
   1749 		ungetsc_i(c);
   1750 		return;
   1751 	}
   1752 	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
   1753 		ungetsc_i(c);
   1754 		ungetsc_i(0xEF);
   1755 		return;
   1756 	}
   1757 	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
   1758 		ungetsc_i(c);
   1759 		ungetsc_i(0xBB);
   1760 		ungetsc_i(0xEF);
   1761 		return;
   1762 	}
   1763 	UTFMODE |= 8;
   1764 }
   1765 
   1766 static Lex_state *
   1767 push_state_i(State_info *si, Lex_state *old_end)
   1768 {
   1769 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
   1770 
   1771 	news[0].ls_base = old_end;
   1772 	si->base = &news[0];
   1773 	si->end = &news[STATE_BSIZE];
   1774 	return (&news[1]);
   1775 }
   1776 
   1777 static Lex_state *
   1778 pop_state_i(State_info *si, Lex_state *old_end)
   1779 {
   1780 	Lex_state *old_base = si->base;
   1781 
   1782 	si->base = old_end->ls_base - STATE_BSIZE;
   1783 	si->end = old_end->ls_base;
   1784 
   1785 	afree(old_base, ATEMP);
   1786 
   1787 	return (si->base + STATE_BSIZE - 1);
   1788 }
   1789