Home | History | Annotate | Download | only in src
      1 /*	$OpenBSD: lex.c,v 1.45 2011/03/09 09:30:39 okan Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
      5  *	Thorsten Glaser <tg (at) mirbsd.org>
      6  *
      7  * Provided that these terms and disclaimer and all copyright notices
      8  * are retained or reproduced in an accompanying document, permission
      9  * is granted to deal in this work without restriction, including un-
     10  * limited rights to use, publicly perform, distribute, sell, modify,
     11  * merge, give away, or sublicence.
     12  *
     13  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
     14  * the utmost extent permitted by applicable law, neither express nor
     15  * implied; without malicious intent or gross negligence. In no event
     16  * may a licensor, author or contributor be held liable for indirect,
     17  * direct, other damage, loss, or other issues arising in any way out
     18  * of dealing in the work, even if advised of the possibility of such
     19  * damage or existence of a defect, except proven that it results out
     20  * of said person's immediate fault when using the work as intended.
     21  */
     22 
     23 #include "sh.h"
     24 
     25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.156 2011/09/07 15:24:16 tg Exp $");
     26 
     27 /*
     28  * states while lexing word
     29  */
     30 #define SBASE		0	/* outside any lexical constructs */
     31 #define SWORD		1	/* implicit quoting for substitute() */
     32 #define SLETPAREN	2	/* inside (( )), implicit quoting */
     33 #define SSQUOTE		3	/* inside '' */
     34 #define SDQUOTE		4	/* inside "" */
     35 #define SEQUOTE		5	/* inside $'' */
     36 #define SBRACE		6	/* inside ${} */
     37 #define SQBRACE		7	/* inside "${}" */
     38 #define SBQUOTE		8	/* inside `` */
     39 #define SASPAREN	9	/* inside $(( )) */
     40 #define SHEREDELIM	10	/* parsing <<,<<- delimiter */
     41 #define SHEREDQUOTE	11	/* parsing " in <<,<<- delimiter */
     42 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
     43 #define SADELIM		13	/* like SBASE, looking for delimiter */
     44 #define SHERESTRING	14	/* parsing <<< string */
     45 #define STBRACEKORN	15	/* parsing ${...[#%]...} !FSH */
     46 #define STBRACEBOURNE	16	/* parsing ${...[#%]...} FSH */
     47 #define SINVALID	255	/* invalid state */
     48 
     49 struct sretrace_info {
     50 	struct sretrace_info *next;
     51 	XString xs;
     52 	char *xp;
     53 };
     54 
     55 /*
     56  * Structure to keep track of the lexing state and the various pieces of info
     57  * needed for each particular state.
     58  */
     59 typedef struct lex_state {
     60 	union {
     61 		/* point to the next state block */
     62 		struct lex_state *base;
     63 		/* marks start of state output in output string */
     64 		int start;
     65 		/* SBQUOTE: true if in double quotes: "`...`" */
     66 		/* SEQUOTE: got NUL, ignore rest of string */
     67 		bool abool;
     68 		/* SADELIM information */
     69 		struct {
     70 			/* character to search for */
     71 			unsigned char delimiter;
     72 			/* max. number of delimiters */
     73 			unsigned char num;
     74 		} adelim;
     75 	} u;
     76 	/* count open parentheses */
     77 	short nparen;
     78 	/* type of this state */
     79 	uint8_t type;
     80 } Lex_state;
     81 #define ls_base		u.base
     82 #define ls_start	u.start
     83 #define ls_bool		u.abool
     84 #define ls_adelim	u.adelim
     85 
     86 typedef struct {
     87 	Lex_state *base;
     88 	Lex_state *end;
     89 } State_info;
     90 
     91 static void readhere(struct ioword *);
     92 static void ungetsc(int);
     93 static void ungetsc_(int);
     94 static int getsc_uu(void);
     95 static void getsc_line(Source *);
     96 static int getsc_bn(void);
     97 static int s_get(void);
     98 static void s_put(int);
     99 static char *get_brace_var(XString *, char *);
    100 static bool arraysub(char **);
    101 static void gethere(bool);
    102 static Lex_state *push_state_(State_info *, Lex_state *);
    103 static Lex_state *pop_state_(State_info *, Lex_state *);
    104 
    105 static int dopprompt(const char *, int, bool);
    106 void yyskiputf8bom(void);
    107 
    108 static int backslash_skip;
    109 static int ignore_backslash_newline;
    110 static struct sretrace_info *retrace_info;
    111 short subshell_nesting_level = 0;
    112 
    113 /* optimised getsc_bn() */
    114 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
    115 			    !backslash_skip ? *source->str++ : getsc_bn())
    116 /* optimised getsc_uu() */
    117 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
    118 
    119 /* retrace helper */
    120 #define o_getsc_r(carg)	{				\
    121 	int cev = (carg);				\
    122 	struct sretrace_info *rp = retrace_info;	\
    123 							\
    124 	while (rp) {					\
    125 		Xcheck(rp->xs, rp->xp);			\
    126 		*rp->xp++ = cev;			\
    127 		rp = rp->next;				\
    128 	}						\
    129 							\
    130 	return (cev);					\
    131 }
    132 
    133 #ifdef MKSH_SMALL
    134 static int getsc(void);
    135 
    136 static int
    137 getsc(void)
    138 {
    139 	o_getsc_r(o_getsc());
    140 }
    141 #else
    142 static int getsc_r(int);
    143 
    144 static int
    145 getsc_r(int c)
    146 {
    147 	o_getsc_r(c);
    148 }
    149 
    150 #define getsc()		getsc_r(o_getsc())
    151 #endif
    152 
    153 #define STATE_BSIZE	8
    154 
    155 #define PUSH_STATE(s)	do {					\
    156 	if (++statep == state_info.end)				\
    157 		statep = push_state_(&state_info, statep);	\
    158 	state = statep->type = (s);				\
    159 } while (/* CONSTCOND */ 0)
    160 
    161 #define POP_STATE()	do {					\
    162 	if (--statep == state_info.base)			\
    163 		statep = pop_state_(&state_info, statep);	\
    164 	state = statep->type;					\
    165 } while (/* CONSTCOND */ 0)
    166 
    167 #define PUSH_SRETRACE()	do {					\
    168 	struct sretrace_info *ri;				\
    169 								\
    170 	statep->ls_start = Xsavepos(ws, wp);			\
    171 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
    172 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
    173 	ri->next = retrace_info;				\
    174 	retrace_info = ri;					\
    175 } while (/* CONSTCOND */ 0)
    176 
    177 #define POP_SRETRACE()	do {					\
    178 	wp = Xrestpos(ws, wp, statep->ls_start);		\
    179 	*retrace_info->xp = '\0';				\
    180 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
    181 	dp = (void *)retrace_info;				\
    182 	retrace_info = retrace_info->next;			\
    183 	afree(dp, ATEMP);					\
    184 } while (/* CONSTCOND */ 0)
    185 
    186 /**
    187  * Lexical analyser
    188  *
    189  * tokens are not regular expressions, they are LL(1).
    190  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    191  * hence the state stack. Note "$(...)" are now parsed recursively.
    192  */
    193 
    194 int
    195 yylex(int cf)
    196 {
    197 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
    198 	State_info state_info;
    199 	int c, c2, state;
    200 	size_t cz;
    201 	XString ws;		/* expandable output word */
    202 	char *wp;		/* output word pointer */
    203 	char *sp, *dp;
    204 
    205  Again:
    206 	states[0].type = SINVALID;
    207 	states[0].ls_base = NULL;
    208 	statep = &states[1];
    209 	state_info.base = states;
    210 	state_info.end = &state_info.base[STATE_BSIZE];
    211 
    212 	Xinit(ws, wp, 64, ATEMP);
    213 
    214 	backslash_skip = 0;
    215 	ignore_backslash_newline = 0;
    216 
    217 	if (cf & ONEWORD)
    218 		state = SWORD;
    219 	else if (cf & LETEXPR) {
    220 		/* enclose arguments in (double) quotes */
    221 		*wp++ = OQUOTE;
    222 		state = SLETPAREN;
    223 		statep->nparen = 0;
    224 	} else {
    225 		/* normal lexing */
    226 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    227 		while ((c = getsc()) == ' ' || c == '\t')
    228 			;
    229 		if (c == '#') {
    230 			ignore_backslash_newline++;
    231 			while ((c = getsc()) != '\0' && c != '\n')
    232 				;
    233 			ignore_backslash_newline--;
    234 		}
    235 		ungetsc(c);
    236 	}
    237 	if (source->flags & SF_ALIAS) {
    238 		/* trailing ' ' in alias definition */
    239 		source->flags &= ~SF_ALIAS;
    240 		cf |= ALIAS;
    241 	}
    242 
    243 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
    244 	statep->type = state;
    245 
    246 	/* check for here string */
    247 	if (state == SHEREDELIM) {
    248 		c = getsc();
    249 		if (c == '<') {
    250 			state = SHERESTRING;
    251 			while ((c = getsc()) == ' ' || c == '\t')
    252 				;
    253 			ungetsc(c);
    254 			c = '<';
    255 			goto accept_nonword;
    256 		}
    257 		ungetsc(c);
    258 	}
    259 
    260 	/* collect non-special or quoted characters to form word */
    261 	while (!((c = getsc()) == 0 ||
    262 	    ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
    263 	    ctype(c, C_LEX1)))) {
    264  accept_nonword:
    265 		Xcheck(ws, wp);
    266 		switch (state) {
    267 		case SADELIM:
    268 			if (c == '(')
    269 				statep->nparen++;
    270 			else if (c == ')')
    271 				statep->nparen--;
    272 			else if (statep->nparen == 0 &&
    273 			    (c == /*{*/ '}' || c == statep->ls_adelim.delimiter)) {
    274 				*wp++ = ADELIM;
    275 				*wp++ = c;
    276 				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
    277 					POP_STATE();
    278 				if (c == /*{*/ '}')
    279 					POP_STATE();
    280 				break;
    281 			}
    282 			/* FALLTHROUGH */
    283 		case SBASE:
    284 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
    285 				/* temporary */
    286 				*wp = EOS;
    287 				if (is_wdvarname(Xstring(ws, wp), false)) {
    288 					char *p, *tmp;
    289 
    290 					if (arraysub(&tmp)) {
    291 						*wp++ = CHAR;
    292 						*wp++ = c;
    293 						for (p = tmp; *p; ) {
    294 							Xcheck(ws, wp);
    295 							*wp++ = CHAR;
    296 							*wp++ = *p++;
    297 						}
    298 						afree(tmp, ATEMP);
    299 						break;
    300 					} else {
    301 						Source *s;
    302 
    303 						s = pushs(SREREAD,
    304 						    source->areap);
    305 						s->start = s->str =
    306 						    s->u.freeme = tmp;
    307 						s->next = source;
    308 						source = s;
    309 					}
    310 				}
    311 				*wp++ = CHAR;
    312 				*wp++ = c;
    313 				break;
    314 			}
    315 			/* FALLTHROUGH */
    316  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
    317 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
    318 			    c == '!') {
    319 				c2 = getsc();
    320 				if (c2 == '(' /*)*/ ) {
    321 					*wp++ = OPAT;
    322 					*wp++ = c;
    323 					PUSH_STATE(SPATTERN);
    324 					break;
    325 				}
    326 				ungetsc(c2);
    327 			}
    328 			/* FALLTHROUGH */
    329  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
    330 			switch (c) {
    331 			case '\\':
    332  getsc_qchar:
    333 				if ((c = getsc())) {
    334 					/* trailing \ is lost */
    335 					*wp++ = QCHAR;
    336 					*wp++ = c;
    337 				}
    338 				break;
    339 			case '\'':
    340  open_ssquote:
    341 				*wp++ = OQUOTE;
    342 				ignore_backslash_newline++;
    343 				PUSH_STATE(SSQUOTE);
    344 				break;
    345 			case '"':
    346  open_sdquote:
    347 				*wp++ = OQUOTE;
    348 				PUSH_STATE(SDQUOTE);
    349 				break;
    350 			default:
    351 				goto Subst;
    352 			}
    353 			break;
    354 
    355  Subst:
    356 			switch (c) {
    357 			case '\\':
    358 				c = getsc();
    359 				switch (c) {
    360 				case '"':
    361 					if ((cf & HEREDOC))
    362 						goto heredocquote;
    363 					/* FALLTHROUGH */
    364 				case '\\':
    365 				case '$': case '`':
    366  store_qchar:
    367 					*wp++ = QCHAR;
    368 					*wp++ = c;
    369 					break;
    370 				default:
    371  heredocquote:
    372 					Xcheck(ws, wp);
    373 					if (c) {
    374 						/* trailing \ is lost */
    375 						*wp++ = CHAR;
    376 						*wp++ = '\\';
    377 						*wp++ = CHAR;
    378 						*wp++ = c;
    379 					}
    380 					break;
    381 				}
    382 				break;
    383 			case '$':
    384  subst_dollar:
    385 				c = getsc();
    386 				if (c == '(') /*)*/ {
    387 					c = getsc();
    388 					if (c == '(') /*)*/ {
    389 						*wp++ = EXPRSUB;
    390 						PUSH_STATE(SASPAREN);
    391 						statep->nparen = 2;
    392 						PUSH_SRETRACE();
    393 						*retrace_info->xp++ = '(';
    394 					} else {
    395 						ungetsc(c);
    396  subst_command:
    397 						sp = yyrecursive();
    398 						cz = strlen(sp) + 1;
    399 						XcheckN(ws, wp, cz);
    400 						*wp++ = COMSUB;
    401 						memcpy(wp, sp, cz);
    402 						wp += cz;
    403 					}
    404 				} else if (c == '{') /*}*/ {
    405 					*wp++ = OSUBST;
    406 					*wp++ = '{'; /*}*/
    407 					wp = get_brace_var(&ws, wp);
    408 					c = getsc();
    409 					/* allow :# and :% (ksh88 compat) */
    410 					if (c == ':') {
    411 						*wp++ = CHAR;
    412 						*wp++ = c;
    413 						c = getsc();
    414 						if (c == ':') {
    415 							*wp++ = CHAR;
    416 							*wp++ = '0';
    417 							*wp++ = ADELIM;
    418 							*wp++ = ':';
    419 							PUSH_STATE(SBRACE);
    420 							PUSH_STATE(SADELIM);
    421 							statep->ls_adelim.delimiter = ':';
    422 							statep->ls_adelim.num = 1;
    423 							statep->nparen = 0;
    424 							break;
    425 						} else if (ksh_isdigit(c) ||
    426 						    c == '('/*)*/ || c == ' ' ||
    427 						    /*XXX what else? */
    428 						    c == '$') {
    429 							/* substring subst. */
    430 							if (c != ' ') {
    431 								*wp++ = CHAR;
    432 								*wp++ = ' ';
    433 							}
    434 							ungetsc(c);
    435 							PUSH_STATE(SBRACE);
    436 							PUSH_STATE(SADELIM);
    437 							statep->ls_adelim.delimiter = ':';
    438 							statep->ls_adelim.num = 2;
    439 							statep->nparen = 0;
    440 							break;
    441 						}
    442 					} else if (c == '/') {
    443 						*wp++ = CHAR;
    444 						*wp++ = c;
    445 						if ((c = getsc()) == '/') {
    446 							*wp++ = ADELIM;
    447 							*wp++ = c;
    448 						} else
    449 							ungetsc(c);
    450 						PUSH_STATE(SBRACE);
    451 						PUSH_STATE(SADELIM);
    452 						statep->ls_adelim.delimiter = '/';
    453 						statep->ls_adelim.num = 1;
    454 						statep->nparen = 0;
    455 						break;
    456 					}
    457 					/*
    458 					 * If this is a trim operation,
    459 					 * treat (,|,) specially in STBRACE.
    460 					 */
    461 					if (ctype(c, C_SUBOP2)) {
    462 						ungetsc(c);
    463 						if (Flag(FSH))
    464 							PUSH_STATE(STBRACEBOURNE);
    465 						else
    466 							PUSH_STATE(STBRACEKORN);
    467 					} else {
    468 						ungetsc(c);
    469 						if (state == SDQUOTE)
    470 							PUSH_STATE(SQBRACE);
    471 						else
    472 							PUSH_STATE(SBRACE);
    473 					}
    474 				} else if (ksh_isalphx(c)) {
    475 					*wp++ = OSUBST;
    476 					*wp++ = 'X';
    477 					do {
    478 						Xcheck(ws, wp);
    479 						*wp++ = c;
    480 						c = getsc();
    481 					} while (ksh_isalnux(c));
    482 					*wp++ = '\0';
    483 					*wp++ = CSUBST;
    484 					*wp++ = 'X';
    485 					ungetsc(c);
    486 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
    487 					Xcheck(ws, wp);
    488 					*wp++ = OSUBST;
    489 					*wp++ = 'X';
    490 					*wp++ = c;
    491 					*wp++ = '\0';
    492 					*wp++ = CSUBST;
    493 					*wp++ = 'X';
    494 				} else if (c == '\'' && (state == SBASE)) {
    495 					/* XXX which other states are valid? */
    496 					*wp++ = OQUOTE;
    497 					ignore_backslash_newline++;
    498 					PUSH_STATE(SEQUOTE);
    499 					statep->ls_bool = false;
    500 					break;
    501 				} else if (c == '"' && (state == SBASE)) {
    502 					/* XXX which other states are valid? */
    503 					goto DEQUOTE;
    504 				} else {
    505 					*wp++ = CHAR;
    506 					*wp++ = '$';
    507  DEQUOTE:
    508 					ungetsc(c);
    509 				}
    510 				break;
    511 			case '`':
    512  subst_gravis:
    513 				PUSH_STATE(SBQUOTE);
    514 				*wp++ = COMSUB;
    515 				/*
    516 				 * Need to know if we are inside double quotes
    517 				 * since sh/AT&T-ksh translate the \" to " in
    518 				 * "`...\"...`".
    519 				 * This is not done in POSIX mode (section
    520 				 * 3.2.3, Double Quotes: "The backquote shall
    521 				 * retain its special meaning introducing the
    522 				 * other form of command substitution (see
    523 				 * 3.6.3). The portion of the quoted string
    524 				 * from the initial backquote and the
    525 				 * characters up to the next backquote that
    526 				 * is not preceded by a backslash (having
    527 				 * escape characters removed) defines that
    528 				 * command whose output replaces `...` when
    529 				 * the word is expanded."
    530 				 * Section 3.6.3, Command Substitution:
    531 				 * "Within the backquoted style of command
    532 				 * substitution, backslash shall retain its
    533 				 * literal meaning, except when followed by
    534 				 * $ ` \.").
    535 				 */
    536 				statep->ls_bool = false;
    537 				s2 = statep;
    538 				base = state_info.base;
    539 				while (/* CONSTCOND */ 1) {
    540 					for (; s2 != base; s2--) {
    541 						if (s2->type == SDQUOTE) {
    542 							statep->ls_bool = true;
    543 							break;
    544 						}
    545 					}
    546 					if (s2 != base)
    547 						break;
    548 					if (!(s2 = s2->ls_base))
    549 						break;
    550 					base = s2-- - STATE_BSIZE;
    551 				}
    552 				break;
    553 			case QCHAR:
    554 				if (cf & LQCHAR) {
    555 					*wp++ = QCHAR;
    556 					*wp++ = getsc();
    557 					break;
    558 				}
    559 				/* FALLTHROUGH */
    560 			default:
    561  store_char:
    562 				*wp++ = CHAR;
    563 				*wp++ = c;
    564 			}
    565 			break;
    566 
    567 		case SEQUOTE:
    568 			if (c == '\'') {
    569 				POP_STATE();
    570 				*wp++ = CQUOTE;
    571 				ignore_backslash_newline--;
    572 			} else if (c == '\\') {
    573 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
    574 					c2 = s_get();
    575 				if (c2 == 0)
    576 					statep->ls_bool = true;
    577 				if (!statep->ls_bool) {
    578 					char ts[4];
    579 
    580 					if ((unsigned int)c2 < 0x100) {
    581 						*wp++ = QCHAR;
    582 						*wp++ = c2;
    583 					} else {
    584 						cz = utf_wctomb(ts, c2 - 0x100);
    585 						ts[cz] = 0;
    586 						for (cz = 0; ts[cz]; ++cz) {
    587 							*wp++ = QCHAR;
    588 							*wp++ = ts[cz];
    589 						}
    590 					}
    591 				}
    592 			} else if (!statep->ls_bool) {
    593 				*wp++ = QCHAR;
    594 				*wp++ = c;
    595 			}
    596 			break;
    597 
    598 		case SSQUOTE:
    599 			if (c == '\'') {
    600 				POP_STATE();
    601 				*wp++ = CQUOTE;
    602 				ignore_backslash_newline--;
    603 			} else {
    604 				*wp++ = QCHAR;
    605 				*wp++ = c;
    606 			}
    607 			break;
    608 
    609 		case SDQUOTE:
    610 			if (c == '"') {
    611 				POP_STATE();
    612 				*wp++ = CQUOTE;
    613 			} else
    614 				goto Subst;
    615 			break;
    616 
    617 		/* $(( ... )) */
    618 		case SASPAREN:
    619 			if (c == '(')
    620 				statep->nparen++;
    621 			else if (c == ')') {
    622 				statep->nparen--;
    623 				if (statep->nparen == 1) {
    624 					/* end of EXPRSUB */
    625 					POP_SRETRACE();
    626 					POP_STATE();
    627 
    628 					if ((c2 = getsc()) == /*(*/ ')') {
    629 						cz = strlen(sp) - 2;
    630 						XcheckN(ws, wp, cz);
    631 						memcpy(wp, sp + 1, cz);
    632 						wp += cz;
    633 						afree(sp, ATEMP);
    634 						*wp++ = '\0';
    635 						break;
    636 					} else {
    637 						Source *s;
    638 
    639 						ungetsc(c2);
    640 						/*
    641 						 * mismatched parenthesis -
    642 						 * assume we were really
    643 						 * parsing a $(...) expression
    644 						 */
    645 						--wp;
    646 						s = pushs(SREREAD,
    647 						    source->areap);
    648 						s->start = s->str =
    649 						    s->u.freeme = sp;
    650 						s->next = source;
    651 						source = s;
    652 						goto subst_command;
    653 					}
    654 				}
    655 			}
    656 			/* reuse existing state machine */
    657 			goto Sbase2;
    658 
    659 		case SQBRACE:
    660 			if (c == '\\') {
    661 				/*
    662 				 * perform POSIX "quote removal" if the back-
    663 				 * slash is "special", i.e. same cases as the
    664 				 * {case '\\':} in Subst: plus closing brace;
    665 				 * in mksh code "quote removal" on '\c' means
    666 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
    667 				 * emitted (in heredocquote:)
    668 				 */
    669 				if ((c = getsc()) == '"' || c == '\\' ||
    670 				    c == '$' || c == '`' || c == /*{*/'}')
    671 					goto store_qchar;
    672 				goto heredocquote;
    673 			}
    674 			goto common_SQBRACE;
    675 
    676 		case SBRACE:
    677 			if (c == '\'')
    678 				goto open_ssquote;
    679 			else if (c == '\\')
    680 				goto getsc_qchar;
    681  common_SQBRACE:
    682 			if (c == '"')
    683 				goto open_sdquote;
    684 			else if (c == '$')
    685 				goto subst_dollar;
    686 			else if (c == '`')
    687 				goto subst_gravis;
    688 			else if (c != /*{*/ '}')
    689 				goto store_char;
    690 			POP_STATE();
    691 			*wp++ = CSUBST;
    692 			*wp++ = /*{*/ '}';
    693 			break;
    694 
    695 		/* Same as SBASE, except (,|,) treated specially */
    696 		case STBRACEKORN:
    697 			if (c == '|')
    698 				*wp++ = SPAT;
    699 			else if (c == '(') {
    700 				*wp++ = OPAT;
    701 				/* simile for @ */
    702 				*wp++ = ' ';
    703 				PUSH_STATE(SPATTERN);
    704 			} else /* FALLTHROUGH */
    705 		case STBRACEBOURNE:
    706 			  if (c == /*{*/ '}') {
    707 				POP_STATE();
    708 				*wp++ = CSUBST;
    709 				*wp++ = /*{*/ '}';
    710 			} else
    711 				goto Sbase1;
    712 			break;
    713 
    714 		case SBQUOTE:
    715 			if (c == '`') {
    716 				*wp++ = 0;
    717 				POP_STATE();
    718 			} else if (c == '\\') {
    719 				switch (c = getsc()) {
    720 				case 0:
    721 					/* trailing \ is lost */
    722 					break;
    723 				case '\\':
    724 				case '$': case '`':
    725 					*wp++ = c;
    726 					break;
    727 				case '"':
    728 					if (statep->ls_bool) {
    729 						*wp++ = c;
    730 						break;
    731 					}
    732 					/* FALLTHROUGH */
    733 				default:
    734 					*wp++ = '\\';
    735 					*wp++ = c;
    736 					break;
    737 				}
    738 			} else
    739 				*wp++ = c;
    740 			break;
    741 
    742 		/* ONEWORD */
    743 		case SWORD:
    744 			goto Subst;
    745 
    746 		/* LETEXPR: (( ... )) */
    747 		case SLETPAREN:
    748 			if (c == /*(*/ ')') {
    749 				if (statep->nparen > 0)
    750 					--statep->nparen;
    751 				else if ((c2 = getsc()) == /*(*/ ')') {
    752 					c = 0;
    753 					*wp++ = CQUOTE;
    754 					goto Done;
    755 				} else {
    756 					Source *s;
    757 
    758 					ungetsc(c2);
    759 					/*
    760 					 * mismatched parenthesis -
    761 					 * assume we were really
    762 					 * parsing a (...) expression
    763 					 */
    764 					*wp = EOS;
    765 					sp = Xstring(ws, wp);
    766 					dp = wdstrip(sp, WDS_KEEPQ);
    767 					s = pushs(SREREAD, source->areap);
    768 					s->start = s->str = s->u.freeme = dp;
    769 					s->next = source;
    770 					source = s;
    771 					return ('('/*)*/);
    772 				}
    773 			} else if (c == '(')
    774 				/*
    775 				 * parentheses inside quotes and
    776 				 * backslashes are lost, but AT&T ksh
    777 				 * doesn't count them either
    778 				 */
    779 				++statep->nparen;
    780 			goto Sbase2;
    781 
    782 		/* <<< delimiter */
    783 		case SHERESTRING:
    784 			if (c == '\\') {
    785 				c = getsc();
    786 				if (c) {
    787 					/* trailing \ is lost */
    788 					*wp++ = QCHAR;
    789 					*wp++ = c;
    790 				}
    791 			} else if (c == '$') {
    792 				if ((c2 = getsc()) == '\'') {
    793 					PUSH_STATE(SEQUOTE);
    794 					statep->ls_bool = false;
    795 					goto sherestring_quoted;
    796 				} else if (c2 == '"')
    797 					goto sherestring_dquoted;
    798 				ungetsc(c2);
    799 				goto sherestring_regular;
    800 			} else if (c == '\'') {
    801 				PUSH_STATE(SSQUOTE);
    802  sherestring_quoted:
    803 				*wp++ = OQUOTE;
    804 				ignore_backslash_newline++;
    805 			} else if (c == '"') {
    806  sherestring_dquoted:
    807 				state = statep->type = SHEREDQUOTE;
    808 				*wp++ = OQUOTE;
    809 				/* just don't IFS split; no quoting mode */
    810 			} else {
    811  sherestring_regular:
    812 				*wp++ = CHAR;
    813 				*wp++ = c;
    814 			}
    815 			break;
    816 
    817 		/* <<,<<- delimiter */
    818 		case SHEREDELIM:
    819 			/*
    820 			 * XXX chuck this state (and the next) - use
    821 			 * the existing states ($ and \`...` should be
    822 			 * stripped of their specialness after the
    823 			 * fact).
    824 			 */
    825 			/*
    826 			 * here delimiters need a special case since
    827 			 * $ and `...` are not to be treated specially
    828 			 */
    829 			if (c == '\\') {
    830 				c = getsc();
    831 				if (c) {
    832 					/* trailing \ is lost */
    833 					*wp++ = QCHAR;
    834 					*wp++ = c;
    835 				}
    836 			} else if (c == '$') {
    837 				if ((c2 = getsc()) == '\'') {
    838 					PUSH_STATE(SEQUOTE);
    839 					statep->ls_bool = false;
    840 					goto sheredelim_quoted;
    841 				} else if (c2 == '"')
    842 					goto sheredelim_dquoted;
    843 				ungetsc(c2);
    844 				goto sheredelim_regular;
    845 			} else if (c == '\'') {
    846 				PUSH_STATE(SSQUOTE);
    847  sheredelim_quoted:
    848 				*wp++ = OQUOTE;
    849 				ignore_backslash_newline++;
    850 			} else if (c == '"') {
    851  sheredelim_dquoted:
    852 				state = statep->type = SHEREDQUOTE;
    853 				*wp++ = OQUOTE;
    854 			} else {
    855  sheredelim_regular:
    856 				*wp++ = CHAR;
    857 				*wp++ = c;
    858 			}
    859 			break;
    860 
    861 		/* " in <<,<<- delimiter */
    862 		case SHEREDQUOTE:
    863 			if (c == '"') {
    864 				*wp++ = CQUOTE;
    865 				state = statep->type =
    866 				    /* dp[1] == '<' means here string */
    867 				    Xstring(ws, wp)[1] == '<' ?
    868 				    SHERESTRING : SHEREDELIM;
    869 			} else {
    870 				if (c == '\\') {
    871 					switch (c = getsc()) {
    872 					case 0:
    873 						/* trailing \ is lost */
    874 					case '\\':
    875 					case '"':
    876 					case '$':
    877 					case '`':
    878 						break;
    879 					default:
    880 						*wp++ = CHAR;
    881 						*wp++ = '\\';
    882 						break;
    883 					}
    884 				}
    885 				*wp++ = CHAR;
    886 				*wp++ = c;
    887 			}
    888 			break;
    889 
    890 		/* in *(...|...) pattern (*+?@!) */
    891 		case SPATTERN:
    892 			if (c == /*(*/ ')') {
    893 				*wp++ = CPAT;
    894 				POP_STATE();
    895 			} else if (c == '|') {
    896 				*wp++ = SPAT;
    897 			} else if (c == '(') {
    898 				*wp++ = OPAT;
    899 				/* simile for @ */
    900 				*wp++ = ' ';
    901 				PUSH_STATE(SPATTERN);
    902 			} else
    903 				goto Sbase1;
    904 			break;
    905 		}
    906 	}
    907  Done:
    908 	Xcheck(ws, wp);
    909 	if (statep != &states[1])
    910 		/* XXX figure out what is missing */
    911 		yyerror("no closing quote\n");
    912 
    913 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    914 	if (state == SHEREDELIM || state == SHERESTRING)
    915 		state = SBASE;
    916 
    917 	dp = Xstring(ws, wp);
    918 	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
    919 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
    920 
    921 		if (Xlength(ws, wp) == 0)
    922 			iop->unit = c == '<' ? 0 : 1;
    923 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
    924 			if (dp[c2] != CHAR)
    925 				goto no_iop;
    926 			if (!ksh_isdigit(dp[c2 + 1]))
    927 				goto no_iop;
    928 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
    929 		}
    930 
    931 		if (iop->unit >= FDBASE)
    932 			goto no_iop;
    933 
    934 		if (c == '&') {
    935 			if ((c2 = getsc()) != '>') {
    936 				ungetsc(c2);
    937 				goto no_iop;
    938 			}
    939 			c = c2;
    940 			iop->flag = IOBASH;
    941 		} else
    942 			iop->flag = 0;
    943 
    944 		c2 = getsc();
    945 		/* <<, >>, <> are ok, >< is not */
    946 		if (c == c2 || (c == '<' && c2 == '>')) {
    947 			iop->flag |= c == c2 ?
    948 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
    949 			if (iop->flag == IOHERE) {
    950 				if ((c2 = getsc()) == '-') {
    951 					iop->flag |= IOSKIP;
    952 					c2 = getsc();
    953 				} else if (c2 == '<')
    954 					iop->flag |= IOHERESTR;
    955 				ungetsc(c2);
    956 				if (c2 == '\n')
    957 					iop->flag |= IONDELIM;
    958 			}
    959 		} else if (c2 == '&')
    960 			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
    961 		else {
    962 			iop->flag |= c == '>' ? IOWRITE : IOREAD;
    963 			if (c == '>' && c2 == '|')
    964 				iop->flag |= IOCLOB;
    965 			else
    966 				ungetsc(c2);
    967 		}
    968 
    969 		iop->name = NULL;
    970 		iop->delim = NULL;
    971 		iop->heredoc = NULL;
    972 		/* free word */
    973 		Xfree(ws, wp);
    974 		yylval.iop = iop;
    975 		return (REDIR);
    976  no_iop:
    977 		afree(iop, ATEMP);
    978 	}
    979 
    980 	if (wp == dp && state == SBASE) {
    981 		/* free word */
    982 		Xfree(ws, wp);
    983 		/* no word, process LEX1 character */
    984 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
    985 			if ((c2 = getsc()) == c)
    986 				c = (c == ';') ? BREAK :
    987 				    (c == '|') ? LOGOR :
    988 				    (c == '&') ? LOGAND :
    989 				    /* c == '(' ) */ MDPAREN;
    990 			else if (c == '|' && c2 == '&')
    991 				c = COPROC;
    992 			else if (c == ';' && c2 == '|')
    993 				c = BRKEV;
    994 			else if (c == ';' && c2 == '&')
    995 				c = BRKFT;
    996 			else
    997 				ungetsc(c2);
    998 #ifndef MKSH_SMALL
    999 			if (c == BREAK) {
   1000 				if ((c2 = getsc()) == '&')
   1001 					c = BRKEV;
   1002 				else
   1003 					ungetsc(c2);
   1004 			}
   1005 #endif
   1006 		} else if (c == '\n') {
   1007 			gethere(false);
   1008 			if (cf & CONTIN)
   1009 				goto Again;
   1010 		} else if (c == '\0')
   1011 			/* need here strings at EOF */
   1012 			gethere(true);
   1013 		return (c);
   1014 	}
   1015 
   1016 	/* terminate word */
   1017 	*wp++ = EOS;
   1018 	yylval.cp = Xclose(ws, wp);
   1019 	if (state == SWORD || state == SLETPAREN
   1020 	    /* XXX ONEWORD? */)
   1021 		return (LWORD);
   1022 
   1023 	/* unget terminator */
   1024 	ungetsc(c);
   1025 
   1026 	/*
   1027 	 * note: the alias-vs-function code below depends on several
   1028 	 * interna: starting from here, source->str is not modified;
   1029 	 * the way getsc() and ungetsc() operate; etc.
   1030 	 */
   1031 
   1032 	/* copy word to unprefixed string ident */
   1033 	sp = yylval.cp;
   1034 	dp = ident;
   1035 	if ((cf & HEREDELIM) && (sp[1] == '<'))
   1036 		while (dp < ident+IDENT) {
   1037 			if ((c = *sp++) == CHAR)
   1038 				*dp++ = *sp++;
   1039 			else if ((c != OQUOTE) && (c != CQUOTE))
   1040 				break;
   1041 		}
   1042 	else
   1043 		while (dp < ident+IDENT && (c = *sp++) == CHAR)
   1044 			*dp++ = *sp++;
   1045 	/* Make sure the ident array stays '\0' padded */
   1046 	memset(dp, 0, (ident+IDENT) - dp + 1);
   1047 	if (c != EOS)
   1048 		/* word is not unquoted */
   1049 		*ident = '\0';
   1050 
   1051 	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
   1052 		struct tbl *p;
   1053 		uint32_t h = hash(ident);
   1054 
   1055 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
   1056 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
   1057 		    p->val.i == /*{*/ '}')) {
   1058 			afree(yylval.cp, ATEMP);
   1059 			return (p->val.i);
   1060 		}
   1061 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
   1062 		    (p->flag & ISSET)) {
   1063 			/*
   1064 			 * this still points to the same character as the
   1065 			 * ungetsc'd terminator from above
   1066 			 */
   1067 			const char *cp = source->str;
   1068 
   1069 			/* prefer POSIX but not Korn functions over aliases */
   1070 			while (*cp == ' ' || *cp == '\t')
   1071 				/*
   1072 				 * this is like getsc() without skipping
   1073 				 * over Source boundaries (including not
   1074 				 * parsing ungetsc'd characters that got
   1075 				 * pushed into an SREREAD) which is what
   1076 				 * we want here anyway: find out whether
   1077 				 * the alias name is followed by a POSIX
   1078 				 * function definition (only the opening
   1079 				 * parenthesis is checked though)
   1080 				 */
   1081 				++cp;
   1082 			/* prefer functions over aliases */
   1083 			if (cp[0] != '(' || cp[1] != ')') {
   1084 				Source *s = source;
   1085 
   1086 				while (s && (s->flags & SF_HASALIAS))
   1087 					if (s->u.tblp == p)
   1088 						return (LWORD);
   1089 					else
   1090 						s = s->next;
   1091 				/* push alias expansion */
   1092 				s = pushs(SALIAS, source->areap);
   1093 				s->start = s->str = p->val.s;
   1094 				s->u.tblp = p;
   1095 				s->flags |= SF_HASALIAS;
   1096 				s->next = source;
   1097 				if (source->type == SEOF) {
   1098 					/* prevent infinite recursion at EOS */
   1099 					source->u.tblp = p;
   1100 					source->flags |= SF_HASALIAS;
   1101 				}
   1102 				source = s;
   1103 				afree(yylval.cp, ATEMP);
   1104 				goto Again;
   1105 			}
   1106 		}
   1107 	}
   1108 
   1109 	return (LWORD);
   1110 }
   1111 
   1112 static void
   1113 gethere(bool iseof)
   1114 {
   1115 	struct ioword **p;
   1116 
   1117 	for (p = heres; p < herep; p++)
   1118 		if (iseof && !((*p)->flag & IOHERESTR))
   1119 			/* only here strings at EOF */
   1120 			return;
   1121 		else
   1122 			readhere(*p);
   1123 	herep = heres;
   1124 }
   1125 
   1126 /*
   1127  * read "<<word" text into temp file
   1128  */
   1129 
   1130 static void
   1131 readhere(struct ioword *iop)
   1132 {
   1133 	int c;
   1134 	const char *eof, *eofp;
   1135 	XString xs;
   1136 	char *xp;
   1137 	int xpos;
   1138 
   1139 	if (iop->flag & IOHERESTR) {
   1140 		/* process the here string */
   1141 		iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
   1142 		xpos = strlen(xp) - 1;
   1143 		memmove(xp, xp + 1, xpos);
   1144 		xp[xpos] = '\n';
   1145 		return;
   1146 	}
   1147 
   1148 	eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
   1149 
   1150 	if (!(iop->flag & IOEVAL))
   1151 		ignore_backslash_newline++;
   1152 
   1153 	Xinit(xs, xp, 256, ATEMP);
   1154 
   1155  heredoc_read_line:
   1156 	/* beginning of line */
   1157 	eofp = eof;
   1158 	xpos = Xsavepos(xs, xp);
   1159 	if (iop->flag & IOSKIP) {
   1160 		/* skip over leading tabs */
   1161 		while ((c = getsc()) == '\t')
   1162 			/* nothing */;
   1163 		goto heredoc_parse_char;
   1164 	}
   1165  heredoc_read_char:
   1166 	c = getsc();
   1167  heredoc_parse_char:
   1168 	/* compare with here document marker */
   1169 	if (!*eofp) {
   1170 		/* end of here document marker, what to do? */
   1171 		switch (c) {
   1172 		case /*(*/ ')':
   1173 			if (!subshell_nesting_level)
   1174 				/*-
   1175 				 * not allowed outside $(...) or (...)
   1176 				 * => mismatch
   1177 				 */
   1178 				break;
   1179 			/* allow $(...) or (...) to close here */
   1180 			ungetsc(/*(*/ ')');
   1181 			/* FALLTHROUGH */
   1182 		case 0:
   1183 			/*
   1184 			 * Allow EOF here to commands without trailing
   1185 			 * newlines (mksh -c '...') will work as well.
   1186 			 */
   1187 		case '\n':
   1188 			/* Newline terminates here document marker */
   1189 			goto heredoc_found_terminator;
   1190 		}
   1191 	} else if (c == *eofp++)
   1192 		/* store; then read and compare next character */
   1193 		goto heredoc_store_and_loop;
   1194 	/* nope, mismatch; read until end of line */
   1195 	while (c != '\n') {
   1196 		if (!c)
   1197 			/* oops, reached EOF */
   1198 			yyerror("%s '%s' unclosed\n", "here document", eof);
   1199 		/* store character */
   1200 		Xcheck(xs, xp);
   1201 		Xput(xs, xp, c);
   1202 		/* read next character */
   1203 		c = getsc();
   1204 	}
   1205 	/* we read a newline as last character */
   1206  heredoc_store_and_loop:
   1207 	/* store character */
   1208 	Xcheck(xs, xp);
   1209 	Xput(xs, xp, c);
   1210 	if (c == '\n')
   1211 		goto heredoc_read_line;
   1212 	goto heredoc_read_char;
   1213 
   1214  heredoc_found_terminator:
   1215 	/* jump back to saved beginning of line */
   1216 	xp = Xrestpos(xs, xp, xpos);
   1217 	/* terminate, close and store */
   1218 	Xput(xs, xp, '\0');
   1219 	iop->heredoc = Xclose(xs, xp);
   1220 
   1221 	if (!(iop->flag & IOEVAL))
   1222 		ignore_backslash_newline--;
   1223 }
   1224 
   1225 void
   1226 yyerror(const char *fmt, ...)
   1227 {
   1228 	va_list va;
   1229 
   1230 	/* pop aliases and re-reads */
   1231 	while (source->type == SALIAS || source->type == SREREAD)
   1232 		source = source->next;
   1233 	/* zap pending input */
   1234 	source->str = null;
   1235 
   1236 	error_prefix(true);
   1237 	va_start(va, fmt);
   1238 	shf_vfprintf(shl_out, fmt, va);
   1239 	va_end(va);
   1240 	errorfz();
   1241 }
   1242 
   1243 /*
   1244  * input for yylex with alias expansion
   1245  */
   1246 
   1247 Source *
   1248 pushs(int type, Area *areap)
   1249 {
   1250 	Source *s;
   1251 
   1252 	s = alloc(sizeof(Source), areap);
   1253 	memset(s, 0, sizeof(Source));
   1254 	s->type = type;
   1255 	s->str = null;
   1256 	s->areap = areap;
   1257 	if (type == SFILE || type == SSTDIN)
   1258 		XinitN(s->xs, 256, s->areap);
   1259 	return (s);
   1260 }
   1261 
   1262 static int
   1263 getsc_uu(void)
   1264 {
   1265 	Source *s = source;
   1266 	int c;
   1267 
   1268 	while ((c = *s->str++) == 0) {
   1269 		/* return 0 for EOF by default */
   1270 		s->str = NULL;
   1271 		switch (s->type) {
   1272 		case SEOF:
   1273 			s->str = null;
   1274 			return (0);
   1275 
   1276 		case SSTDIN:
   1277 		case SFILE:
   1278 			getsc_line(s);
   1279 			break;
   1280 
   1281 		case SWSTR:
   1282 			break;
   1283 
   1284 		case SSTRING:
   1285 			break;
   1286 
   1287 		case SWORDS:
   1288 			s->start = s->str = *s->u.strv++;
   1289 			s->type = SWORDSEP;
   1290 			break;
   1291 
   1292 		case SWORDSEP:
   1293 			if (*s->u.strv == NULL) {
   1294 				s->start = s->str = "\n";
   1295 				s->type = SEOF;
   1296 			} else {
   1297 				s->start = s->str = " ";
   1298 				s->type = SWORDS;
   1299 			}
   1300 			break;
   1301 
   1302 		case SALIAS:
   1303 			if (s->flags & SF_ALIASEND) {
   1304 				/* pass on an unused SF_ALIAS flag */
   1305 				source = s->next;
   1306 				source->flags |= s->flags & SF_ALIAS;
   1307 				s = source;
   1308 			} else if (*s->u.tblp->val.s &&
   1309 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
   1310 				/* pop source stack */
   1311 				source = s = s->next;
   1312 				/*
   1313 				 * Note that this alias ended with a
   1314 				 * space, enabling alias expansion on
   1315 				 * the following word.
   1316 				 */
   1317 				s->flags |= SF_ALIAS;
   1318 			} else {
   1319 				/*
   1320 				 * At this point, we need to keep the current
   1321 				 * alias in the source list so recursive
   1322 				 * aliases can be detected and we also need to
   1323 				 * return the next character. Do this by
   1324 				 * temporarily popping the alias to get the
   1325 				 * next character and then put it back in the
   1326 				 * source list with the SF_ALIASEND flag set.
   1327 				 */
   1328 				/* pop source stack */
   1329 				source = s->next;
   1330 				source->flags |= s->flags & SF_ALIAS;
   1331 				c = getsc_uu();
   1332 				if (c) {
   1333 					s->flags |= SF_ALIASEND;
   1334 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1335 					s->start = s->str = s->ugbuf;
   1336 					s->next = source;
   1337 					source = s;
   1338 				} else {
   1339 					s = source;
   1340 					/* avoid reading EOF twice */
   1341 					s->str = NULL;
   1342 					break;
   1343 				}
   1344 			}
   1345 			continue;
   1346 
   1347 		case SREREAD:
   1348 			if (s->start != s->ugbuf)
   1349 				/* yuck */
   1350 				afree(s->u.freeme, ATEMP);
   1351 			source = s = s->next;
   1352 			continue;
   1353 		}
   1354 		if (s->str == NULL) {
   1355 			s->type = SEOF;
   1356 			s->start = s->str = null;
   1357 			return ('\0');
   1358 		}
   1359 		if (s->flags & SF_ECHO) {
   1360 			shf_puts(s->str, shl_out);
   1361 			shf_flush(shl_out);
   1362 		}
   1363 	}
   1364 	return (c);
   1365 }
   1366 
   1367 static void
   1368 getsc_line(Source *s)
   1369 {
   1370 	char *xp = Xstring(s->xs, xp), *cp;
   1371 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
   1372 	int have_tty = interactive && (s->flags & SF_TTY);
   1373 
   1374 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1375 	XcheckN(s->xs, xp, LINE);
   1376 	*xp = '\0';
   1377 	s->start = s->str = xp;
   1378 
   1379 	if (have_tty && ksh_tmout) {
   1380 		ksh_tmout_state = TMOUT_READING;
   1381 		alarm(ksh_tmout);
   1382 	}
   1383 	if (interactive)
   1384 		change_winsz();
   1385 	if (have_tty && (
   1386 #if !MKSH_S_NOVI
   1387 	    Flag(FVI) ||
   1388 #endif
   1389 	    Flag(FEMACS) || Flag(FGMACS))) {
   1390 		int nread;
   1391 
   1392 		nread = x_read(xp, LINE);
   1393 		if (nread < 0)
   1394 			/* read error */
   1395 			nread = 0;
   1396 		xp[nread] = '\0';
   1397 		xp += nread;
   1398 	} else {
   1399 		if (interactive)
   1400 			pprompt(prompt, 0);
   1401 		else
   1402 			s->line++;
   1403 
   1404 		while (/* CONSTCOND */ 1) {
   1405 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1406 
   1407 			if (!p && shf_error(s->u.shf) &&
   1408 			    shf_errno(s->u.shf) == EINTR) {
   1409 				shf_clearerr(s->u.shf);
   1410 				if (trap)
   1411 					runtraps(0);
   1412 				continue;
   1413 			}
   1414 			if (!p || (xp = p, xp[-1] == '\n'))
   1415 				break;
   1416 			/* double buffer size */
   1417 			/* move past NUL so doubling works... */
   1418 			xp++;
   1419 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1420 			/* ...and move back again */
   1421 			xp--;
   1422 		}
   1423 		/*
   1424 		 * flush any unwanted input so other programs/builtins
   1425 		 * can read it. Not very optimal, but less error prone
   1426 		 * than flushing else where, dealing with redirections,
   1427 		 * etc.
   1428 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
   1429 		 */
   1430 		if (s->type == SSTDIN)
   1431 			shf_flush(s->u.shf);
   1432 	}
   1433 	/*
   1434 	 * XXX: temporary kludge to restore source after a
   1435 	 * trap may have been executed.
   1436 	 */
   1437 	source = s;
   1438 	if (have_tty && ksh_tmout) {
   1439 		ksh_tmout_state = TMOUT_EXECUTING;
   1440 		alarm(0);
   1441 	}
   1442 	cp = Xstring(s->xs, xp);
   1443 #ifndef MKSH_SMALL
   1444 	if (interactive && *cp == '!' && cur_prompt == PS1) {
   1445 		int linelen;
   1446 
   1447 		linelen = Xlength(s->xs, xp);
   1448 		XcheckN(s->xs, xp, Zfc_e_dash + /* NUL */ 1);
   1449 		/* reload after potential realloc */
   1450 		cp = Xstring(s->xs, xp);
   1451 		/* change initial '!' into space */
   1452 		*cp = ' ';
   1453 		/* NUL terminate the current string */
   1454 		*xp = '\0';
   1455 		/* move the actual string forward */
   1456 		memmove(cp + Zfc_e_dash, cp, linelen + /* NUL */ 1);
   1457 		xp += Zfc_e_dash;
   1458 		/* prepend it with "fc -e -" */
   1459 		memcpy(cp, Tfc_e_dash, Zfc_e_dash);
   1460 	}
   1461 #endif
   1462 	s->start = s->str = cp;
   1463 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1464 	/* Note: if input is all nulls, this is not eof */
   1465 	if (Xlength(s->xs, xp) == 0) {
   1466 		/* EOF */
   1467 		if (s->type == SFILE)
   1468 			shf_fdclose(s->u.shf);
   1469 		s->str = NULL;
   1470 	} else if (interactive && *s->str &&
   1471 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
   1472 		histsave(&s->line, s->str, true, true);
   1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
   1474 	} else if (interactive && cur_prompt == PS1) {
   1475 		cp = Xstring(s->xs, xp);
   1476 		while (*cp && ctype(*cp, C_IFSWS))
   1477 			++cp;
   1478 		if (!*cp)
   1479 			histsync();
   1480 #endif
   1481 	}
   1482 	if (interactive)
   1483 		set_prompt(PS2, NULL);
   1484 }
   1485 
   1486 void
   1487 set_prompt(int to, Source *s)
   1488 {
   1489 	cur_prompt = to;
   1490 
   1491 	switch (to) {
   1492 	/* command */
   1493 	case PS1:
   1494 		/*
   1495 		 * Substitute ! and !! here, before substitutions are done
   1496 		 * so ! in expanded variables are not expanded.
   1497 		 * NOTE: this is not what AT&T ksh does (it does it after
   1498 		 * substitutions, POSIX doesn't say which is to be done.
   1499 		 */
   1500 		{
   1501 			struct shf *shf;
   1502 			char * volatile ps1;
   1503 			Area *saved_atemp;
   1504 
   1505 			ps1 = str_val(global("PS1"));
   1506 			shf = shf_sopen(NULL, strlen(ps1) * 2,
   1507 			    SHF_WR | SHF_DYNAMIC, NULL);
   1508 			while (*ps1)
   1509 				if (*ps1 != '!' || *++ps1 == '!')
   1510 					shf_putchar(*ps1++, shf);
   1511 				else
   1512 					shf_fprintf(shf, "%d",
   1513 						s ? s->line + 1 : 0);
   1514 			ps1 = shf_sclose(shf);
   1515 			saved_atemp = ATEMP;
   1516 			newenv(E_ERRH);
   1517 			if (sigsetjmp(e->jbuf, 0)) {
   1518 				prompt = safe_prompt;
   1519 				/*
   1520 				 * Don't print an error - assume it has already
   1521 				 * been printed. Reason is we may have forked
   1522 				 * to run a command and the child may be
   1523 				 * unwinding its stack through this code as it
   1524 				 * exits.
   1525 				 */
   1526 			} else {
   1527 				char *cp = substitute(ps1, 0);
   1528 				strdupx(prompt, cp, saved_atemp);
   1529 			}
   1530 			quitenv(NULL);
   1531 		}
   1532 		break;
   1533 	/* command continuation */
   1534 	case PS2:
   1535 		prompt = str_val(global("PS2"));
   1536 		break;
   1537 	}
   1538 }
   1539 
   1540 static int
   1541 dopprompt(const char *cp, int ntruncate, bool doprint)
   1542 {
   1543 	int columns = 0, lines = 0, indelimit = 0;
   1544 	char delimiter = 0;
   1545 
   1546 	/*
   1547 	 * Undocumented AT&T ksh feature:
   1548 	 * If the second char in the prompt string is \r then the first
   1549 	 * char is taken to be a non-printing delimiter and any chars
   1550 	 * between two instances of the delimiter are not considered to
   1551 	 * be part of the prompt length
   1552 	 */
   1553 	if (*cp && cp[1] == '\r') {
   1554 		delimiter = *cp;
   1555 		cp += 2;
   1556 	}
   1557 	for (; *cp; cp++) {
   1558 		if (indelimit && *cp != delimiter)
   1559 			;
   1560 		else if (*cp == '\n' || *cp == '\r') {
   1561 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
   1562 			columns = 0;
   1563 		} else if (*cp == '\t') {
   1564 			columns = (columns | 7) + 1;
   1565 		} else if (*cp == '\b') {
   1566 			if (columns > 0)
   1567 				columns--;
   1568 		} else if (*cp == delimiter)
   1569 			indelimit = !indelimit;
   1570 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
   1571 			const char *cp2;
   1572 			columns += utf_widthadj(cp, &cp2);
   1573 			if (doprint && (indelimit ||
   1574 			    (ntruncate < (x_cols * lines + columns))))
   1575 				shf_write(cp, cp2 - cp, shl_out);
   1576 			cp = cp2 - /* loop increment */ 1;
   1577 			continue;
   1578 		} else
   1579 			columns++;
   1580 		if (doprint && (*cp != delimiter) &&
   1581 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
   1582 			shf_putc(*cp, shl_out);
   1583 	}
   1584 	if (doprint)
   1585 		shf_flush(shl_out);
   1586 	return (x_cols * lines + columns);
   1587 }
   1588 
   1589 
   1590 void
   1591 pprompt(const char *cp, int ntruncate)
   1592 {
   1593 	dopprompt(cp, ntruncate, true);
   1594 }
   1595 
   1596 int
   1597 promptlen(const char *cp)
   1598 {
   1599 	return (dopprompt(cp, 0, false));
   1600 }
   1601 
   1602 /*
   1603  * Read the variable part of a ${...} expression (i.e. up to but not
   1604  * including the :[-+?=#%] or close-brace).
   1605  */
   1606 static char *
   1607 get_brace_var(XString *wsp, char *wp)
   1608 {
   1609 	char c;
   1610 	enum parse_state {
   1611 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
   1612 		PS_NUMBER, PS_VAR1
   1613 	} state = PS_INITIAL;
   1614 
   1615 	while (/* CONSTCOND */ 1) {
   1616 		c = getsc();
   1617 		/* State machine to figure out where the variable part ends. */
   1618 		switch (state) {
   1619 		case PS_INITIAL:
   1620 			if (c == '#' || c == '!' || c == '%') {
   1621 				state = PS_SAW_HASH;
   1622 				break;
   1623 			}
   1624 			/* FALLTHROUGH */
   1625 		case PS_SAW_HASH:
   1626 			if (ksh_isalphx(c))
   1627 				state = PS_IDENT;
   1628 			else if (ksh_isdigit(c))
   1629 				state = PS_NUMBER;
   1630 			else if (c == '#') {
   1631 				if (state == PS_SAW_HASH) {
   1632 					char c2;
   1633 
   1634 					c2 = getsc();
   1635 					ungetsc(c2);
   1636 					if (c2 != '}') {
   1637 						ungetsc(c);
   1638 						goto out;
   1639 					}
   1640 				}
   1641 				state = PS_VAR1;
   1642 			} else if (ctype(c, C_VAR1))
   1643 				state = PS_VAR1;
   1644 			else
   1645 				goto out;
   1646 			break;
   1647 		case PS_IDENT:
   1648 			if (!ksh_isalnux(c)) {
   1649 				if (c == '[') {
   1650 					char *tmp, *p;
   1651 
   1652 					if (!arraysub(&tmp))
   1653 						yyerror("missing ]\n");
   1654 					*wp++ = c;
   1655 					for (p = tmp; *p; ) {
   1656 						Xcheck(*wsp, wp);
   1657 						*wp++ = *p++;
   1658 					}
   1659 					afree(tmp, ATEMP);
   1660 					/* the ] */
   1661 					c = getsc();
   1662 				}
   1663 				goto out;
   1664 			}
   1665 			break;
   1666 		case PS_NUMBER:
   1667 			if (!ksh_isdigit(c))
   1668 				goto out;
   1669 			break;
   1670 		case PS_VAR1:
   1671 			goto out;
   1672 		}
   1673 		Xcheck(*wsp, wp);
   1674 		*wp++ = c;
   1675 	}
   1676  out:
   1677 	/* end of variable part */
   1678 	*wp++ = '\0';
   1679 	ungetsc(c);
   1680 	return (wp);
   1681 }
   1682 
   1683 /*
   1684  * Save an array subscript - returns true if matching bracket found, false
   1685  * if eof or newline was found.
   1686  * (Returned string double null terminated)
   1687  */
   1688 static bool
   1689 arraysub(char **strp)
   1690 {
   1691 	XString ws;
   1692 	char *wp, c;
   1693 	/* we are just past the initial [ */
   1694 	int depth = 1;
   1695 
   1696 	Xinit(ws, wp, 32, ATEMP);
   1697 
   1698 	do {
   1699 		c = getsc();
   1700 		Xcheck(ws, wp);
   1701 		*wp++ = c;
   1702 		if (c == '[')
   1703 			depth++;
   1704 		else if (c == ']')
   1705 			depth--;
   1706 	} while (depth > 0 && c && c != '\n');
   1707 
   1708 	*wp++ = '\0';
   1709 	*strp = Xclose(ws, wp);
   1710 
   1711 	return (tobool(depth == 0));
   1712 }
   1713 
   1714 /* Unget a char: handles case when we are already at the start of the buffer */
   1715 static void
   1716 ungetsc(int c)
   1717 {
   1718 	struct sretrace_info *rp = retrace_info;
   1719 
   1720 	if (backslash_skip)
   1721 		backslash_skip--;
   1722 	/* Don't unget EOF... */
   1723 	if (source->str == null && c == '\0')
   1724 		return;
   1725 	while (rp) {
   1726 		if (Xlength(rp->xs, rp->xp))
   1727 			rp->xp--;
   1728 		rp = rp->next;
   1729 	}
   1730 	ungetsc_(c);
   1731 }
   1732 static void
   1733 ungetsc_(int c)
   1734 {
   1735 	if (source->str > source->start)
   1736 		source->str--;
   1737 	else {
   1738 		Source *s;
   1739 
   1740 		s = pushs(SREREAD, source->areap);
   1741 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1742 		s->start = s->str = s->ugbuf;
   1743 		s->next = source;
   1744 		source = s;
   1745 	}
   1746 }
   1747 
   1748 
   1749 /* Called to get a char that isn't a \newline sequence. */
   1750 static int
   1751 getsc_bn(void)
   1752 {
   1753 	int c, c2;
   1754 
   1755 	if (ignore_backslash_newline)
   1756 		return (o_getsc_u());
   1757 
   1758 	if (backslash_skip == 1) {
   1759 		backslash_skip = 2;
   1760 		return (o_getsc_u());
   1761 	}
   1762 
   1763 	backslash_skip = 0;
   1764 
   1765 	while (/* CONSTCOND */ 1) {
   1766 		c = o_getsc_u();
   1767 		if (c == '\\') {
   1768 			if ((c2 = o_getsc_u()) == '\n')
   1769 				/* ignore the \newline; get the next char... */
   1770 				continue;
   1771 			ungetsc_(c2);
   1772 			backslash_skip = 1;
   1773 		}
   1774 		return (c);
   1775 	}
   1776 }
   1777 
   1778 void
   1779 yyskiputf8bom(void)
   1780 {
   1781 	int c;
   1782 
   1783 	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
   1784 		ungetsc_(c);
   1785 		return;
   1786 	}
   1787 	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
   1788 		ungetsc_(c);
   1789 		ungetsc_(0xEF);
   1790 		return;
   1791 	}
   1792 	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
   1793 		ungetsc_(c);
   1794 		ungetsc_(0xBB);
   1795 		ungetsc_(0xEF);
   1796 		return;
   1797 	}
   1798 	UTFMODE |= 8;
   1799 }
   1800 
   1801 static Lex_state *
   1802 push_state_(State_info *si, Lex_state *old_end)
   1803 {
   1804 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
   1805 
   1806 	news[0].ls_base = old_end;
   1807 	si->base = &news[0];
   1808 	si->end = &news[STATE_BSIZE];
   1809 	return (&news[1]);
   1810 }
   1811 
   1812 static Lex_state *
   1813 pop_state_(State_info *si, Lex_state *old_end)
   1814 {
   1815 	Lex_state *old_base = si->base;
   1816 
   1817 	si->base = old_end->ls_base - STATE_BSIZE;
   1818 	si->end = old_end->ls_base;
   1819 
   1820 	afree(old_base, ATEMP);
   1821 
   1822 	return (si->base + STATE_BSIZE - 1);
   1823 }
   1824 
   1825 static int
   1826 s_get(void)
   1827 {
   1828 	return (getsc());
   1829 }
   1830 
   1831 static void
   1832 s_put(int c)
   1833 {
   1834 	ungetsc(c);
   1835 }
   1836