Home | History | Annotate | Download | only in src
      1 /*	$OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
      5  *		 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
      6  *	mirabilos <m (at) mirbsd.org>
      7  *
      8  * Provided that these terms and disclaimer and all copyright notices
      9  * are retained or reproduced in an accompanying document, permission
     10  * is granted to deal in this work without restriction, including un-
     11  * limited rights to use, publicly perform, distribute, sell, modify,
     12  * merge, give away, or sublicence.
     13  *
     14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
     15  * the utmost extent permitted by applicable law, neither express nor
     16  * implied; without malicious intent or gross negligence. In no event
     17  * may a licensor, author or contributor be held liable for indirect,
     18  * direct, other damage, loss, or other issues arising in any way out
     19  * of dealing in the work, even if advised of the possibility of such
     20  * damage or existence of a defect, except proven that it results out
     21  * of said person's immediate fault when using the work as intended.
     22  */
     23 
     24 #include "sh.h"
     25 
     26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.247 2018/01/14 01:44:01 tg Exp $");
     27 
     28 /*
     29  * states while lexing word
     30  */
     31 #define SBASE		0	/* outside any lexical constructs */
     32 #define SWORD		1	/* implicit quoting for substitute() */
     33 #define SLETPAREN	2	/* inside (( )), implicit quoting */
     34 #define SSQUOTE		3	/* inside '' */
     35 #define SDQUOTE		4	/* inside "" */
     36 #define SEQUOTE		5	/* inside $'' */
     37 #define SBRACE		6	/* inside ${} */
     38 #define SQBRACE		7	/* inside "${}" */
     39 #define SBQUOTE		8	/* inside `` */
     40 #define SASPAREN	9	/* inside $(( )) */
     41 #define SHEREDELIM	10	/* parsing << or <<- delimiter */
     42 #define SHEREDQUOTE	11	/* parsing " in << or <<- delimiter */
     43 #define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
     44 #define SADELIM		13	/* like SBASE, looking for delimiter */
     45 #define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
     46 #define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
     47 #define SINVALID	255	/* invalid state */
     48 
     49 struct sretrace_info {
     50 	struct sretrace_info *next;
     51 	XString xs;
     52 	char *xp;
     53 };
     54 
     55 /*
     56  * Structure to keep track of the lexing state and the various pieces of info
     57  * needed for each particular state.
     58  */
     59 typedef struct lex_state {
     60 	union {
     61 		/* point to the next state block */
     62 		struct lex_state *base;
     63 		/* marks start of state output in output string */
     64 		size_t start;
     65 		/* SBQUOTE: true if in double quotes: "`...`" */
     66 		/* SEQUOTE: got NUL, ignore rest of string */
     67 		bool abool;
     68 		/* SADELIM information */
     69 		struct {
     70 			/* character to search for */
     71 			unsigned char delimiter;
     72 			/* max. number of delimiters */
     73 			unsigned char num;
     74 		} adelim;
     75 	} u;
     76 	/* count open parentheses */
     77 	short nparen;
     78 	/* type of this state */
     79 	uint8_t type;
     80 } Lex_state;
     81 #define ls_base		u.base
     82 #define ls_start	u.start
     83 #define ls_bool		u.abool
     84 #define ls_adelim	u.adelim
     85 
     86 typedef struct {
     87 	Lex_state *base;
     88 	Lex_state *end;
     89 } State_info;
     90 
     91 static void readhere(struct ioword *);
     92 static void ungetsc(int);
     93 static void ungetsc_i(int);
     94 static int getsc_uu(void);
     95 static void getsc_line(Source *);
     96 static int getsc_bn(void);
     97 static int getsc_i(void);
     98 static char *get_brace_var(XString *, char *);
     99 static bool arraysub(char **);
    100 static void gethere(void);
    101 static Lex_state *push_state_i(State_info *, Lex_state *);
    102 static Lex_state *pop_state_i(State_info *, Lex_state *);
    103 
    104 static int backslash_skip;
    105 static int ignore_backslash_newline;
    106 
    107 /* optimised getsc_bn() */
    108 #define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
    109 			    !backslash_skip ? *source->str++ : getsc_bn())
    110 /* optimised getsc_uu() */
    111 #define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
    112 
    113 /* retrace helper */
    114 #define o_getsc_r(carg)					\
    115 	int cev = (carg);				\
    116 	struct sretrace_info *rp = retrace_info;	\
    117 							\
    118 	while (rp) {					\
    119 		Xcheck(rp->xs, rp->xp);			\
    120 		*rp->xp++ = cev;			\
    121 		rp = rp->next;				\
    122 	}						\
    123 							\
    124 	return (cev);
    125 
    126 /* callback */
    127 static int
    128 getsc_i(void)
    129 {
    130 	o_getsc_r((unsigned int)(unsigned char)o_getsc());
    131 }
    132 
    133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
    134 #define getsc()		getsc_i()
    135 #else
    136 static int getsc_r(int);
    137 
    138 static int
    139 getsc_r(int c)
    140 {
    141 	o_getsc_r(c);
    142 }
    143 
    144 #define getsc()		getsc_r((unsigned int)(unsigned char)o_getsc())
    145 #endif
    146 
    147 #define STATE_BSIZE	8
    148 
    149 #define PUSH_STATE(s)	do {					\
    150 	if (++statep == state_info.end)				\
    151 		statep = push_state_i(&state_info, statep);	\
    152 	state = statep->type = (s);				\
    153 } while (/* CONSTCOND */ 0)
    154 
    155 #define POP_STATE()	do {					\
    156 	if (--statep == state_info.base)			\
    157 		statep = pop_state_i(&state_info, statep);	\
    158 	state = statep->type;					\
    159 } while (/* CONSTCOND */ 0)
    160 
    161 #define PUSH_SRETRACE(s) do {					\
    162 	struct sretrace_info *ri;				\
    163 								\
    164 	PUSH_STATE(s);						\
    165 	statep->ls_start = Xsavepos(ws, wp);			\
    166 	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
    167 	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
    168 	ri->next = retrace_info;				\
    169 	retrace_info = ri;					\
    170 } while (/* CONSTCOND */ 0)
    171 
    172 #define POP_SRETRACE()	do {					\
    173 	wp = Xrestpos(ws, wp, statep->ls_start);		\
    174 	*retrace_info->xp = '\0';				\
    175 	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
    176 	dp = (void *)retrace_info;				\
    177 	retrace_info = retrace_info->next;			\
    178 	afree(dp, ATEMP);					\
    179 	POP_STATE();						\
    180 } while (/* CONSTCOND */ 0)
    181 
    182 /**
    183  * Lexical analyser
    184  *
    185  * tokens are not regular expressions, they are LL(1).
    186  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    187  * hence the state stack. Note "$(...)" are now parsed recursively.
    188  */
    189 
    190 int
    191 yylex(int cf)
    192 {
    193 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
    194 	State_info state_info;
    195 	int c, c2, state;
    196 	size_t cz;
    197 	XString ws;		/* expandable output word */
    198 	char *wp;		/* output word pointer */
    199 	char *sp, *dp;
    200 
    201  Again:
    202 	states[0].type = SINVALID;
    203 	states[0].ls_base = NULL;
    204 	statep = &states[1];
    205 	state_info.base = states;
    206 	state_info.end = &state_info.base[STATE_BSIZE];
    207 
    208 	Xinit(ws, wp, 64, ATEMP);
    209 
    210 	backslash_skip = 0;
    211 	ignore_backslash_newline = 0;
    212 
    213 	if (cf & ONEWORD)
    214 		state = SWORD;
    215 	else if (cf & LETEXPR) {
    216 		/* enclose arguments in (double) quotes */
    217 		*wp++ = OQUOTE;
    218 		state = SLETPAREN;
    219 		statep->nparen = 0;
    220 	} else {
    221 		/* normal lexing */
    222 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    223 		do {
    224 			c = getsc();
    225 		} while (ctype(c, C_BLANK));
    226 		if (c == '#') {
    227 			ignore_backslash_newline++;
    228 			do {
    229 				c = getsc();
    230 			} while (!ctype(c, C_NUL | C_LF));
    231 			ignore_backslash_newline--;
    232 		}
    233 		ungetsc(c);
    234 	}
    235 	if (source->flags & SF_ALIAS) {
    236 		/* trailing ' ' in alias definition */
    237 		source->flags &= ~SF_ALIAS;
    238 		/* POSIX: trailing space only counts if parsing simple cmd */
    239 		if (!Flag(FPOSIX) || (cf & CMDWORD))
    240 			cf |= ALIAS;
    241 	}
    242 
    243 	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
    244 	statep->type = state;
    245 
    246 	/* collect non-special or quoted characters to form word */
    247 	while (!((c = getsc()) == 0 ||
    248 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
    249 		if (state == SBASE &&
    250 		    subshell_nesting_type == ORD(/*{*/ '}') &&
    251 		    (unsigned int)c == ORD(/*{*/ '}'))
    252 			/* possibly end ${ :;} */
    253 			break;
    254 		Xcheck(ws, wp);
    255 		switch (state) {
    256 		case SADELIM:
    257 			if ((unsigned int)c == ORD('('))
    258 				statep->nparen++;
    259 			else if ((unsigned int)c == ORD(')'))
    260 				statep->nparen--;
    261 			else if (statep->nparen == 0 &&
    262 			    ((unsigned int)c == ORD(/*{*/ '}') ||
    263 			    c == (int)statep->ls_adelim.delimiter)) {
    264 				*wp++ = ADELIM;
    265 				*wp++ = c;
    266 				if ((unsigned int)c == ORD(/*{*/ '}') ||
    267 				    --statep->ls_adelim.num == 0)
    268 					POP_STATE();
    269 				if ((unsigned int)c == ORD(/*{*/ '}'))
    270 					POP_STATE();
    271 				break;
    272 			}
    273 			/* FALLTHROUGH */
    274 		case SBASE:
    275 			if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
    276 				/* temporary */
    277 				*wp = EOS;
    278 				if (is_wdvarname(Xstring(ws, wp), false)) {
    279 					char *p, *tmp;
    280 
    281 					if (arraysub(&tmp)) {
    282 						*wp++ = CHAR;
    283 						*wp++ = c;
    284 						for (p = tmp; *p; ) {
    285 							Xcheck(ws, wp);
    286 							*wp++ = CHAR;
    287 							*wp++ = *p++;
    288 						}
    289 						afree(tmp, ATEMP);
    290 						break;
    291 					}
    292 				}
    293 				*wp++ = CHAR;
    294 				*wp++ = c;
    295 				break;
    296 			}
    297 			/* FALLTHROUGH */
    298  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
    299 			if (ctype(c, C_PATMO)) {
    300 				c2 = getsc();
    301 				if ((unsigned int)c2 == ORD('(' /*)*/)) {
    302 					*wp++ = OPAT;
    303 					*wp++ = c;
    304 					PUSH_STATE(SPATTERN);
    305 					break;
    306 				}
    307 				ungetsc(c2);
    308 			}
    309 			/* FALLTHROUGH */
    310  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
    311 			switch (c) {
    312 			case ORD('\\'):
    313  getsc_qchar:
    314 				if ((c = getsc())) {
    315 					/* trailing \ is lost */
    316 					*wp++ = QCHAR;
    317 					*wp++ = c;
    318 				}
    319 				break;
    320 			case ORD('\''):
    321  open_ssquote_unless_heredoc:
    322 				if ((cf & HEREDOC))
    323 					goto store_char;
    324 				*wp++ = OQUOTE;
    325 				ignore_backslash_newline++;
    326 				PUSH_STATE(SSQUOTE);
    327 				break;
    328 			case ORD('"'):
    329  open_sdquote:
    330 				*wp++ = OQUOTE;
    331 				PUSH_STATE(SDQUOTE);
    332 				break;
    333 			case ORD('$'):
    334 				/*
    335 				 * processing of dollar sign belongs into
    336 				 * Subst, except for those which can open
    337 				 * a string: $'' and $""
    338 				 */
    339  subst_dollar_ex:
    340 				c = getsc();
    341 				switch (c) {
    342 				case ORD('"'):
    343 					goto open_sdquote;
    344 				case ORD('\''):
    345 					goto open_sequote;
    346 				default:
    347 					goto SubstS;
    348 				}
    349 			default:
    350 				goto Subst;
    351 			}
    352 			break;
    353 
    354  Subst:
    355 			switch (c) {
    356 			case ORD('\\'):
    357 				c = getsc();
    358 				switch (c) {
    359 				case ORD('"'):
    360 					if ((cf & HEREDOC))
    361 						goto heredocquote;
    362 					/* FALLTHROUGH */
    363 				case ORD('\\'):
    364 				case ORD('$'):
    365 				case ORD('`'):
    366  store_qchar:
    367 					*wp++ = QCHAR;
    368 					*wp++ = c;
    369 					break;
    370 				default:
    371  heredocquote:
    372 					Xcheck(ws, wp);
    373 					if (c) {
    374 						/* trailing \ is lost */
    375 						*wp++ = CHAR;
    376 						*wp++ = '\\';
    377 						*wp++ = CHAR;
    378 						*wp++ = c;
    379 					}
    380 					break;
    381 				}
    382 				break;
    383 			case ORD('$'):
    384 				c = getsc();
    385  SubstS:
    386 				if ((unsigned int)c == ORD('(' /*)*/)) {
    387 					c = getsc();
    388 					if ((unsigned int)c == ORD('(' /*)*/)) {
    389 						*wp++ = EXPRSUB;
    390 						PUSH_SRETRACE(SASPAREN);
    391 						statep->nparen = 2;
    392 						*retrace_info->xp++ = '(';
    393 					} else {
    394 						ungetsc(c);
    395  subst_command:
    396 						c = COMSUB;
    397  subst_command2:
    398 						sp = yyrecursive(c);
    399 						cz = strlen(sp) + 1;
    400 						XcheckN(ws, wp, cz);
    401 						*wp++ = c;
    402 						memcpy(wp, sp, cz);
    403 						wp += cz;
    404 					}
    405 				} else if ((unsigned int)c == ORD('{' /*}*/)) {
    406 					if ((unsigned int)(c = getsc()) == ORD('|')) {
    407 						/*
    408 						 * non-subenvironment
    409 						 * value substitution
    410 						 */
    411 						c = VALSUB;
    412 						goto subst_command2;
    413 					} else if (ctype(c, C_IFSWS)) {
    414 						/*
    415 						 * non-subenvironment
    416 						 * "command" substitution
    417 						 */
    418 						c = FUNSUB;
    419 						goto subst_command2;
    420 					}
    421 					ungetsc(c);
    422 					*wp++ = OSUBST;
    423 					*wp++ = '{' /*}*/;
    424 					wp = get_brace_var(&ws, wp);
    425 					c = getsc();
    426 					/* allow :# and :% (ksh88 compat) */
    427 					if ((unsigned int)c == ORD(':')) {
    428 						*wp++ = CHAR;
    429 						*wp++ = c;
    430 						c = getsc();
    431 						if ((unsigned int)c == ORD(':')) {
    432 							*wp++ = CHAR;
    433 							*wp++ = '0';
    434 							*wp++ = ADELIM;
    435 							*wp++ = ':';
    436 							PUSH_STATE(SBRACE);
    437 							PUSH_STATE(SADELIM);
    438 							statep->ls_adelim.delimiter = ':';
    439 							statep->ls_adelim.num = 1;
    440 							statep->nparen = 0;
    441 							break;
    442 						} else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
    443 						    /*XXX what else? */
    444 						    c == '(' /*)*/) {
    445 							/* substring subst. */
    446 							if (c != ' ') {
    447 								*wp++ = CHAR;
    448 								*wp++ = ' ';
    449 							}
    450 							ungetsc(c);
    451 							PUSH_STATE(SBRACE);
    452 							PUSH_STATE(SADELIM);
    453 							statep->ls_adelim.delimiter = ':';
    454 							statep->ls_adelim.num = 2;
    455 							statep->nparen = 0;
    456 							break;
    457 						}
    458 					} else if (c == '/') {
    459 						c2 = ADELIM;
    460  parse_adelim_slash:
    461 						*wp++ = CHAR;
    462 						*wp++ = c;
    463 						if ((unsigned int)(c = getsc()) == ORD('/')) {
    464 							*wp++ = c2;
    465 							*wp++ = c;
    466 						} else
    467 							ungetsc(c);
    468 						PUSH_STATE(SBRACE);
    469 						PUSH_STATE(SADELIM);
    470 						statep->ls_adelim.delimiter = '/';
    471 						statep->ls_adelim.num = 1;
    472 						statep->nparen = 0;
    473 						break;
    474 					} else if (c == '@') {
    475 						c2 = getsc();
    476 						ungetsc(c2);
    477 						if ((unsigned int)c2 == ORD('/')) {
    478 							c2 = CHAR;
    479 							goto parse_adelim_slash;
    480 						}
    481 					}
    482 					/*
    483 					 * If this is a trim operation,
    484 					 * treat (,|,) specially in STBRACE.
    485 					 */
    486 					if (ctype(c, C_SUB2)) {
    487 						ungetsc(c);
    488 						if (Flag(FSH))
    489 							PUSH_STATE(STBRACEBOURNE);
    490 						else
    491 							PUSH_STATE(STBRACEKORN);
    492 					} else {
    493 						ungetsc(c);
    494 						if (state == SDQUOTE ||
    495 						    state == SQBRACE)
    496 							PUSH_STATE(SQBRACE);
    497 						else
    498 							PUSH_STATE(SBRACE);
    499 					}
    500 				} else if (ctype(c, C_ALPHX)) {
    501 					*wp++ = OSUBST;
    502 					*wp++ = 'X';
    503 					do {
    504 						Xcheck(ws, wp);
    505 						*wp++ = c;
    506 						c = getsc();
    507 					} while (ctype(c, C_ALNUX));
    508 					*wp++ = '\0';
    509 					*wp++ = CSUBST;
    510 					*wp++ = 'X';
    511 					ungetsc(c);
    512 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
    513 					Xcheck(ws, wp);
    514 					*wp++ = OSUBST;
    515 					*wp++ = 'X';
    516 					*wp++ = c;
    517 					*wp++ = '\0';
    518 					*wp++ = CSUBST;
    519 					*wp++ = 'X';
    520 				} else {
    521 					*wp++ = CHAR;
    522 					*wp++ = '$';
    523 					ungetsc(c);
    524 				}
    525 				break;
    526 			case ORD('`'):
    527  subst_gravis:
    528 				PUSH_STATE(SBQUOTE);
    529 				*wp++ = COMASUB;
    530 				/*
    531 				 * We need to know whether we are within double
    532 				 * quotes in order to translate \" to " within
    533 				 * "`\"`" because, unlike for COMSUBs, the
    534 				 * outer double quoteing changes the backslash
    535 				 * meaning for the inside. For more details:
    536 				 * http://austingroupbugs.net/view.php?id=1015
    537 				 */
    538 				statep->ls_bool = false;
    539 				s2 = statep;
    540 				base = state_info.base;
    541 				while (/* CONSTCOND */ 1) {
    542 					for (; s2 != base; s2--) {
    543 						if (s2->type == SDQUOTE) {
    544 							statep->ls_bool = true;
    545 							break;
    546 						}
    547 					}
    548 					if (s2 != base)
    549 						break;
    550 					if (!(s2 = s2->ls_base))
    551 						break;
    552 					base = s2-- - STATE_BSIZE;
    553 				}
    554 				break;
    555 			case QCHAR:
    556 				if (cf & LQCHAR) {
    557 					*wp++ = QCHAR;
    558 					*wp++ = getsc();
    559 					break;
    560 				}
    561 				/* FALLTHROUGH */
    562 			default:
    563  store_char:
    564 				*wp++ = CHAR;
    565 				*wp++ = c;
    566 			}
    567 			break;
    568 
    569 		case SEQUOTE:
    570 			if ((unsigned int)c == ORD('\'')) {
    571 				POP_STATE();
    572 				*wp++ = CQUOTE;
    573 				ignore_backslash_newline--;
    574 			} else if ((unsigned int)c == ORD('\\')) {
    575 				if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
    576 					c2 = getsc();
    577 				if (c2 == 0)
    578 					statep->ls_bool = true;
    579 				if (!statep->ls_bool) {
    580 					char ts[4];
    581 
    582 					if ((unsigned int)c2 < 0x100) {
    583 						*wp++ = QCHAR;
    584 						*wp++ = c2;
    585 					} else {
    586 						cz = utf_wctomb(ts, c2 - 0x100);
    587 						ts[cz] = 0;
    588 						cz = 0;
    589 						do {
    590 							*wp++ = QCHAR;
    591 							*wp++ = ts[cz];
    592 						} while (ts[++cz]);
    593 					}
    594 				}
    595 			} else if (!statep->ls_bool) {
    596 				*wp++ = QCHAR;
    597 				*wp++ = c;
    598 			}
    599 			break;
    600 
    601 		case SSQUOTE:
    602 			if ((unsigned int)c == ORD('\'')) {
    603 				POP_STATE();
    604 				if ((cf & HEREDOC) || state == SQBRACE)
    605 					goto store_char;
    606 				*wp++ = CQUOTE;
    607 				ignore_backslash_newline--;
    608 			} else {
    609 				*wp++ = QCHAR;
    610 				*wp++ = c;
    611 			}
    612 			break;
    613 
    614 		case SDQUOTE:
    615 			if ((unsigned int)c == ORD('"')) {
    616 				POP_STATE();
    617 				*wp++ = CQUOTE;
    618 			} else
    619 				goto Subst;
    620 			break;
    621 
    622 		/* $(( ... )) */
    623 		case SASPAREN:
    624 			if ((unsigned int)c == ORD('('))
    625 				statep->nparen++;
    626 			else if ((unsigned int)c == ORD(')')) {
    627 				statep->nparen--;
    628 				if (statep->nparen == 1) {
    629 					/* end of EXPRSUB */
    630 					POP_SRETRACE();
    631 
    632 					if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
    633 						cz = strlen(sp) - 2;
    634 						XcheckN(ws, wp, cz);
    635 						memcpy(wp, sp + 1, cz);
    636 						wp += cz;
    637 						afree(sp, ATEMP);
    638 						*wp++ = '\0';
    639 						break;
    640 					} else {
    641 						Source *s;
    642 
    643 						ungetsc(c2);
    644 						/*
    645 						 * mismatched parenthesis -
    646 						 * assume we were really
    647 						 * parsing a $(...) expression
    648 						 */
    649 						--wp;
    650 						s = pushs(SREREAD,
    651 						    source->areap);
    652 						s->start = s->str =
    653 						    s->u.freeme = sp;
    654 						s->next = source;
    655 						source = s;
    656 						goto subst_command;
    657 					}
    658 				}
    659 			}
    660 			/* reuse existing state machine */
    661 			goto Sbase2;
    662 
    663 		case SQBRACE:
    664 			if ((unsigned int)c == ORD('\\')) {
    665 				/*
    666 				 * perform POSIX "quote removal" if the back-
    667 				 * slash is "special", i.e. same cases as the
    668 				 * {case '\\':} in Subst: plus closing brace;
    669 				 * in mksh code "quote removal" on '\c' means
    670 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
    671 				 * emitted (in heredocquote:)
    672 				 */
    673 				if ((unsigned int)(c = getsc()) == ORD('"') ||
    674 				    (unsigned int)c == ORD('\\') ||
    675 				    ctype(c, C_DOLAR | C_GRAVE) ||
    676 				    (unsigned int)c == ORD(/*{*/ '}'))
    677 					goto store_qchar;
    678 				goto heredocquote;
    679 			}
    680 			goto common_SQBRACE;
    681 
    682 		case SBRACE:
    683 			if ((unsigned int)c == ORD('\''))
    684 				goto open_ssquote_unless_heredoc;
    685 			else if ((unsigned int)c == ORD('\\'))
    686 				goto getsc_qchar;
    687  common_SQBRACE:
    688 			if ((unsigned int)c == ORD('"'))
    689 				goto open_sdquote;
    690 			else if ((unsigned int)c == ORD('$'))
    691 				goto subst_dollar_ex;
    692 			else if ((unsigned int)c == ORD('`'))
    693 				goto subst_gravis;
    694 			else if ((unsigned int)c != ORD(/*{*/ '}'))
    695 				goto store_char;
    696 			POP_STATE();
    697 			*wp++ = CSUBST;
    698 			*wp++ = /*{*/ '}';
    699 			break;
    700 
    701 		/* Same as SBASE, except (,|,) treated specially */
    702 		case STBRACEKORN:
    703 			if ((unsigned int)c == ORD('|'))
    704 				*wp++ = SPAT;
    705 			else if ((unsigned int)c == ORD('(')) {
    706 				*wp++ = OPAT;
    707 				/* simile for @ */
    708 				*wp++ = ' ';
    709 				PUSH_STATE(SPATTERN);
    710 			} else /* FALLTHROUGH */
    711 		case STBRACEBOURNE:
    712 			  if ((unsigned int)c == ORD(/*{*/ '}')) {
    713 				POP_STATE();
    714 				*wp++ = CSUBST;
    715 				*wp++ = /*{*/ '}';
    716 			} else
    717 				goto Sbase1;
    718 			break;
    719 
    720 		case SBQUOTE:
    721 			if ((unsigned int)c == ORD('`')) {
    722 				*wp++ = 0;
    723 				POP_STATE();
    724 			} else if ((unsigned int)c == ORD('\\')) {
    725 				switch (c = getsc()) {
    726 				case 0:
    727 					/* trailing \ is lost */
    728 					break;
    729 				case ORD('$'):
    730 				case ORD('`'):
    731 				case ORD('\\'):
    732 					*wp++ = c;
    733 					break;
    734 				case ORD('"'):
    735 					if (statep->ls_bool) {
    736 						*wp++ = c;
    737 						break;
    738 					}
    739 					/* FALLTHROUGH */
    740 				default:
    741 					*wp++ = '\\';
    742 					*wp++ = c;
    743 					break;
    744 				}
    745 			} else
    746 				*wp++ = c;
    747 			break;
    748 
    749 		/* ONEWORD */
    750 		case SWORD:
    751 			goto Subst;
    752 
    753 		/* LETEXPR: (( ... )) */
    754 		case SLETPAREN:
    755 			if ((unsigned int)c == ORD(/*(*/ ')')) {
    756 				if (statep->nparen > 0)
    757 					--statep->nparen;
    758 				else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
    759 					c = 0;
    760 					*wp++ = CQUOTE;
    761 					goto Done;
    762 				} else {
    763 					Source *s;
    764 
    765 					ungetsc(c2);
    766 					ungetsc(c);
    767 					/*
    768 					 * mismatched parenthesis -
    769 					 * assume we were really
    770 					 * parsing a (...) expression
    771 					 */
    772 					*wp = EOS;
    773 					sp = Xstring(ws, wp);
    774 					dp = wdstrip(sp + 1, WDS_TPUTS);
    775 					s = pushs(SREREAD, source->areap);
    776 					s->start = s->str = s->u.freeme = dp;
    777 					s->next = source;
    778 					source = s;
    779 					ungetsc('(' /*)*/);
    780 					return (ORD('(' /*)*/));
    781 				}
    782 			} else if ((unsigned int)c == ORD('('))
    783 				/*
    784 				 * parentheses inside quotes and
    785 				 * backslashes are lost, but AT&T ksh
    786 				 * doesn't count them either
    787 				 */
    788 				++statep->nparen;
    789 			goto Sbase2;
    790 
    791 		/* << or <<- delimiter */
    792 		case SHEREDELIM:
    793 			/*
    794 			 * here delimiters need a special case since
    795 			 * $ and `...` are not to be treated specially
    796 			 */
    797 			switch (c) {
    798 			case ORD('\\'):
    799 				if ((c = getsc())) {
    800 					/* trailing \ is lost */
    801 					*wp++ = QCHAR;
    802 					*wp++ = c;
    803 				}
    804 				break;
    805 			case ORD('\''):
    806 				goto open_ssquote_unless_heredoc;
    807 			case ORD('$'):
    808 				if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
    809  open_sequote:
    810 					*wp++ = OQUOTE;
    811 					ignore_backslash_newline++;
    812 					PUSH_STATE(SEQUOTE);
    813 					statep->ls_bool = false;
    814 					break;
    815 				} else if ((unsigned int)c2 == ORD('"')) {
    816 					/* FALLTHROUGH */
    817 			case ORD('"'):
    818 					PUSH_SRETRACE(SHEREDQUOTE);
    819 					break;
    820 				}
    821 				ungetsc(c2);
    822 				/* FALLTHROUGH */
    823 			default:
    824 				*wp++ = CHAR;
    825 				*wp++ = c;
    826 			}
    827 			break;
    828 
    829 		/* " in << or <<- delimiter */
    830 		case SHEREDQUOTE:
    831 			if ((unsigned int)c != ORD('"'))
    832 				goto Subst;
    833 			POP_SRETRACE();
    834 			dp = strnul(sp) - 1;
    835 			/* remove the trailing double quote */
    836 			*dp = '\0';
    837 			/* store the quoted string */
    838 			*wp++ = OQUOTE;
    839 			XcheckN(ws, wp, (dp - sp) * 2);
    840 			dp = sp;
    841 			while ((c = *dp++)) {
    842 				if (c == '\\') {
    843 					switch ((c = *dp++)) {
    844 					case ORD('\\'):
    845 					case ORD('"'):
    846 					case ORD('$'):
    847 					case ORD('`'):
    848 						break;
    849 					default:
    850 						*wp++ = CHAR;
    851 						*wp++ = '\\';
    852 						break;
    853 					}
    854 				}
    855 				*wp++ = CHAR;
    856 				*wp++ = c;
    857 			}
    858 			afree(sp, ATEMP);
    859 			*wp++ = CQUOTE;
    860 			state = statep->type = SHEREDELIM;
    861 			break;
    862 
    863 		/* in *(...|...) pattern (*+?@!) */
    864 		case SPATTERN:
    865 			if ((unsigned int)c == ORD(/*(*/ ')')) {
    866 				*wp++ = CPAT;
    867 				POP_STATE();
    868 			} else if ((unsigned int)c == ORD('|')) {
    869 				*wp++ = SPAT;
    870 			} else if ((unsigned int)c == ORD('(')) {
    871 				*wp++ = OPAT;
    872 				/* simile for @ */
    873 				*wp++ = ' ';
    874 				PUSH_STATE(SPATTERN);
    875 			} else
    876 				goto Sbase1;
    877 			break;
    878 		}
    879 	}
    880  Done:
    881 	Xcheck(ws, wp);
    882 	if (statep != &states[1])
    883 		/* XXX figure out what is missing */
    884 		yyerror("no closing quote");
    885 
    886 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    887 	if (state == SHEREDELIM)
    888 		state = SBASE;
    889 
    890 	dp = Xstring(ws, wp);
    891 	if (state == SBASE && (
    892 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
    893 	    ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
    894 	    (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
    895 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
    896 
    897 		iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
    898 
    899 		if (c == '&') {
    900 			if ((unsigned int)(c2 = getsc()) != ORD('>')) {
    901 				ungetsc(c2);
    902 				goto no_iop;
    903 			}
    904 			c = c2;
    905 			iop->ioflag = IOBASH;
    906 		} else
    907 			iop->ioflag = 0;
    908 
    909 		c2 = getsc();
    910 		/* <<, >>, <> are ok, >< is not */
    911 		if (c == c2 || ((unsigned int)c == ORD('<') &&
    912 		    (unsigned int)c2 == ORD('>'))) {
    913 			iop->ioflag |= c == c2 ?
    914 			    ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
    915 			if (iop->ioflag == IOHERE) {
    916 				if ((unsigned int)(c2 = getsc()) == ORD('-'))
    917 					iop->ioflag |= IOSKIP;
    918 				else if ((unsigned int)c2 == ORD('<'))
    919 					iop->ioflag |= IOHERESTR;
    920 				else
    921 					ungetsc(c2);
    922 			}
    923 		} else if ((unsigned int)c2 == ORD('&'))
    924 			iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
    925 		else {
    926 			iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
    927 			if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
    928 				iop->ioflag |= IOCLOB;
    929 			else
    930 				ungetsc(c2);
    931 		}
    932 
    933 		iop->ioname = NULL;
    934 		iop->delim = NULL;
    935 		iop->heredoc = NULL;
    936 		/* free word */
    937 		Xfree(ws, wp);
    938 		yylval.iop = iop;
    939 		return (REDIR);
    940  no_iop:
    941 		afree(iop, ATEMP);
    942 	}
    943 
    944 	if (wp == dp && state == SBASE) {
    945 		/* free word */
    946 		Xfree(ws, wp);
    947 		/* no word, process LEX1 character */
    948 		if (((unsigned int)c == ORD('|')) ||
    949 		    ((unsigned int)c == ORD('&')) ||
    950 		    ((unsigned int)c == ORD(';')) ||
    951 		    ((unsigned int)c == ORD('(' /*)*/))) {
    952 			if ((c2 = getsc()) == c)
    953 				c = ((unsigned int)c == ORD(';')) ? BREAK :
    954 				    ((unsigned int)c == ORD('|')) ? LOGOR :
    955 				    ((unsigned int)c == ORD('&')) ? LOGAND :
    956 				    /* (unsigned int)c == ORD('(' )) */ MDPAREN;
    957 			else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
    958 				c = COPROC;
    959 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
    960 				c = BRKEV;
    961 			else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
    962 				c = BRKFT;
    963 			else
    964 				ungetsc(c2);
    965 #ifndef MKSH_SMALL
    966 			if (c == BREAK) {
    967 				if ((unsigned int)(c2 = getsc()) == ORD('&'))
    968 					c = BRKEV;
    969 				else
    970 					ungetsc(c2);
    971 			}
    972 #endif
    973 		} else if ((unsigned int)c == ORD('\n')) {
    974 			if (cf & HEREDELIM)
    975 				ungetsc(c);
    976 			else {
    977 				gethere();
    978 				if (cf & CONTIN)
    979 					goto Again;
    980 			}
    981 		} else if (c == '\0' && !(cf & HEREDELIM)) {
    982 			struct ioword **p = heres;
    983 
    984 			while (p < herep)
    985 				if ((*p)->ioflag & IOHERESTR)
    986 					++p;
    987 				else
    988 					/* ksh -c 'cat <<EOF' can cause this */
    989 					yyerror(Tf_heredoc,
    990 					    evalstr((*p)->delim, 0));
    991 		}
    992 		return (c);
    993 	}
    994 
    995 	/* terminate word */
    996 	*wp++ = EOS;
    997 	yylval.cp = Xclose(ws, wp);
    998 	if (state == SWORD || state == SLETPAREN
    999 	    /* XXX ONEWORD? */)
   1000 		return (LWORD);
   1001 
   1002 	/* unget terminator */
   1003 	ungetsc(c);
   1004 
   1005 	/*
   1006 	 * note: the alias-vs-function code below depends on several
   1007 	 * interna: starting from here, source->str is not modified;
   1008 	 * the way getsc() and ungetsc() operate; etc.
   1009 	 */
   1010 
   1011 	/* copy word to unprefixed string ident */
   1012 	sp = yylval.cp;
   1013 	dp = ident;
   1014 	while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
   1015 		*dp++ = *sp++;
   1016 	if (c != EOS)
   1017 		/* word is not unquoted, or space ran out */
   1018 		dp = ident;
   1019 	/* make sure the ident array stays NUL padded */
   1020 	memset(dp, 0, (ident + IDENT) - dp + 1);
   1021 
   1022 	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
   1023 		struct tbl *p;
   1024 		uint32_t h = hash(ident);
   1025 
   1026 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
   1027 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
   1028 		    (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
   1029 			afree(yylval.cp, ATEMP);
   1030 			return (p->val.i);
   1031 		}
   1032 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
   1033 		    (p->flag & ISSET)) {
   1034 			/*
   1035 			 * this still points to the same character as the
   1036 			 * ungetsc'd terminator from above
   1037 			 */
   1038 			const char *cp = source->str;
   1039 
   1040 			/* prefer POSIX but not Korn functions over aliases */
   1041 			while (ctype(*cp, C_BLANK))
   1042 				/*
   1043 				 * this is like getsc() without skipping
   1044 				 * over Source boundaries (including not
   1045 				 * parsing ungetsc'd characters that got
   1046 				 * pushed into an SREREAD) which is what
   1047 				 * we want here anyway: find out whether
   1048 				 * the alias name is followed by a POSIX
   1049 				 * function definition
   1050 				 */
   1051 				++cp;
   1052 			/* prefer functions over aliases */
   1053 			if (cp[0] != '(' || cp[1] != ')') {
   1054 				Source *s = source;
   1055 
   1056 				while (s && (s->flags & SF_HASALIAS))
   1057 					if (s->u.tblp == p)
   1058 						return (LWORD);
   1059 					else
   1060 						s = s->next;
   1061 				/* push alias expansion */
   1062 				s = pushs(SALIAS, source->areap);
   1063 				s->start = s->str = p->val.s;
   1064 				s->u.tblp = p;
   1065 				s->flags |= SF_HASALIAS;
   1066 				s->line = source->line;
   1067 				s->next = source;
   1068 				if (source->type == SEOF) {
   1069 					/* prevent infinite recursion at EOS */
   1070 					source->u.tblp = p;
   1071 					source->flags |= SF_HASALIAS;
   1072 				}
   1073 				source = s;
   1074 				afree(yylval.cp, ATEMP);
   1075 				goto Again;
   1076 			}
   1077 		}
   1078 	} else if (*ident == '\0') {
   1079 		/* retain typeset et al. even when quoted */
   1080 		struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
   1081 		uint32_t flag = tt ? tt->flag : 0;
   1082 
   1083 		if (flag & (DECL_UTIL | DECL_FWDR))
   1084 			strlcpy(ident, dp, sizeof(ident));
   1085 		afree(dp, ATEMP);
   1086 	}
   1087 
   1088 	return (LWORD);
   1089 }
   1090 
   1091 static void
   1092 gethere(void)
   1093 {
   1094 	struct ioword **p;
   1095 
   1096 	for (p = heres; p < herep; p++)
   1097 		if (!((*p)->ioflag & IOHERESTR))
   1098 			readhere(*p);
   1099 	herep = heres;
   1100 }
   1101 
   1102 /*
   1103  * read "<<word" text into temp file
   1104  */
   1105 
   1106 static void
   1107 readhere(struct ioword *iop)
   1108 {
   1109 	int c;
   1110 	const char *eof, *eofp;
   1111 	XString xs;
   1112 	char *xp;
   1113 	size_t xpos;
   1114 
   1115 	eof = evalstr(iop->delim, 0);
   1116 
   1117 	if (!(iop->ioflag & IOEVAL))
   1118 		ignore_backslash_newline++;
   1119 
   1120 	Xinit(xs, xp, 256, ATEMP);
   1121 
   1122  heredoc_read_line:
   1123 	/* beginning of line */
   1124 	eofp = eof;
   1125 	xpos = Xsavepos(xs, xp);
   1126 	if (iop->ioflag & IOSKIP) {
   1127 		/* skip over leading tabs */
   1128 		while ((c = getsc()) == '\t')
   1129 			;	/* nothing */
   1130 		goto heredoc_parse_char;
   1131 	}
   1132  heredoc_read_char:
   1133 	c = getsc();
   1134  heredoc_parse_char:
   1135 	/* compare with here document marker */
   1136 	if (!*eofp) {
   1137 		/* end of here document marker, what to do? */
   1138 		switch (c) {
   1139 		case ORD(/*(*/ ')'):
   1140 			if (!subshell_nesting_type)
   1141 				/*-
   1142 				 * not allowed outside $(...) or (...)
   1143 				 * => mismatch
   1144 				 */
   1145 				break;
   1146 			/* allow $(...) or (...) to close here */
   1147 			ungetsc(/*(*/ ')');
   1148 			/* FALLTHROUGH */
   1149 		case 0:
   1150 			/*
   1151 			 * Allow EOF here to commands without trailing
   1152 			 * newlines (mksh -c '...') will work as well.
   1153 			 */
   1154 		case ORD('\n'):
   1155 			/* Newline terminates here document marker */
   1156 			goto heredoc_found_terminator;
   1157 		}
   1158 	} else if (c == *eofp++)
   1159 		/* store; then read and compare next character */
   1160 		goto heredoc_store_and_loop;
   1161 	/* nope, mismatch; read until end of line */
   1162 	while (c != '\n') {
   1163 		if (!c)
   1164 			/* oops, reached EOF */
   1165 			yyerror(Tf_heredoc, eof);
   1166 		/* store character */
   1167 		Xcheck(xs, xp);
   1168 		Xput(xs, xp, c);
   1169 		/* read next character */
   1170 		c = getsc();
   1171 	}
   1172 	/* we read a newline as last character */
   1173  heredoc_store_and_loop:
   1174 	/* store character */
   1175 	Xcheck(xs, xp);
   1176 	Xput(xs, xp, c);
   1177 	if (c == '\n')
   1178 		goto heredoc_read_line;
   1179 	goto heredoc_read_char;
   1180 
   1181  heredoc_found_terminator:
   1182 	/* jump back to saved beginning of line */
   1183 	xp = Xrestpos(xs, xp, xpos);
   1184 	/* terminate, close and store */
   1185 	Xput(xs, xp, '\0');
   1186 	iop->heredoc = Xclose(xs, xp);
   1187 
   1188 	if (!(iop->ioflag & IOEVAL))
   1189 		ignore_backslash_newline--;
   1190 }
   1191 
   1192 void
   1193 yyerror(const char *fmt, ...)
   1194 {
   1195 	va_list va;
   1196 
   1197 	/* pop aliases and re-reads */
   1198 	while (source->type == SALIAS || source->type == SREREAD)
   1199 		source = source->next;
   1200 	/* zap pending input */
   1201 	source->str = null;
   1202 
   1203 	error_prefix(true);
   1204 	va_start(va, fmt);
   1205 	shf_vfprintf(shl_out, fmt, va);
   1206 	shf_putc('\n', shl_out);
   1207 	va_end(va);
   1208 	errorfz();
   1209 }
   1210 
   1211 /*
   1212  * input for yylex with alias expansion
   1213  */
   1214 
   1215 Source *
   1216 pushs(int type, Area *areap)
   1217 {
   1218 	Source *s;
   1219 
   1220 	s = alloc(sizeof(Source), areap);
   1221 	memset(s, 0, sizeof(Source));
   1222 	s->type = type;
   1223 	s->str = null;
   1224 	s->areap = areap;
   1225 	if (type == SFILE || type == SSTDIN)
   1226 		XinitN(s->xs, 256, s->areap);
   1227 	return (s);
   1228 }
   1229 
   1230 static int
   1231 getsc_uu(void)
   1232 {
   1233 	Source *s = source;
   1234 	int c;
   1235 
   1236 	while ((c = ord(*s->str++)) == 0) {
   1237 		/* return 0 for EOF by default */
   1238 		s->str = NULL;
   1239 		switch (s->type) {
   1240 		case SEOF:
   1241 			s->str = null;
   1242 			return (0);
   1243 
   1244 		case SSTDIN:
   1245 		case SFILE:
   1246 			getsc_line(s);
   1247 			break;
   1248 
   1249 		case SWSTR:
   1250 			break;
   1251 
   1252 		case SSTRING:
   1253 		case SSTRINGCMDLINE:
   1254 			break;
   1255 
   1256 		case SWORDS:
   1257 			s->start = s->str = *s->u.strv++;
   1258 			s->type = SWORDSEP;
   1259 			break;
   1260 
   1261 		case SWORDSEP:
   1262 			if (*s->u.strv == NULL) {
   1263 				s->start = s->str = "\n";
   1264 				s->type = SEOF;
   1265 			} else {
   1266 				s->start = s->str = T1space;
   1267 				s->type = SWORDS;
   1268 			}
   1269 			break;
   1270 
   1271 		case SALIAS:
   1272 			if (s->flags & SF_ALIASEND) {
   1273 				/* pass on an unused SF_ALIAS flag */
   1274 				source = s->next;
   1275 				source->flags |= s->flags & SF_ALIAS;
   1276 				s = source;
   1277 			} else if (*s->u.tblp->val.s &&
   1278 			    ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
   1279 				/* pop source stack */
   1280 				source = s = s->next;
   1281 				/*
   1282 				 * Note that this alias ended with a
   1283 				 * space, enabling alias expansion on
   1284 				 * the following word.
   1285 				 */
   1286 				s->flags |= SF_ALIAS;
   1287 			} else {
   1288 				/*
   1289 				 * At this point, we need to keep the current
   1290 				 * alias in the source list so recursive
   1291 				 * aliases can be detected and we also need to
   1292 				 * return the next character. Do this by
   1293 				 * temporarily popping the alias to get the
   1294 				 * next character and then put it back in the
   1295 				 * source list with the SF_ALIASEND flag set.
   1296 				 */
   1297 				/* pop source stack */
   1298 				source = s->next;
   1299 				source->flags |= s->flags & SF_ALIAS;
   1300 				c = getsc_uu();
   1301 				if (c) {
   1302 					s->flags |= SF_ALIASEND;
   1303 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1304 					s->start = s->str = s->ugbuf;
   1305 					s->next = source;
   1306 					source = s;
   1307 				} else {
   1308 					s = source;
   1309 					/* avoid reading EOF twice */
   1310 					s->str = NULL;
   1311 					break;
   1312 				}
   1313 			}
   1314 			continue;
   1315 
   1316 		case SREREAD:
   1317 			if (s->start != s->ugbuf)
   1318 				/* yuck */
   1319 				afree(s->u.freeme, ATEMP);
   1320 			source = s = s->next;
   1321 			continue;
   1322 		}
   1323 		if (s->str == NULL) {
   1324 			s->type = SEOF;
   1325 			s->start = s->str = null;
   1326 			return ('\0');
   1327 		}
   1328 		if (s->flags & SF_ECHO) {
   1329 			shf_puts(s->str, shl_out);
   1330 			shf_flush(shl_out);
   1331 		}
   1332 	}
   1333 	return (c);
   1334 }
   1335 
   1336 static void
   1337 getsc_line(Source *s)
   1338 {
   1339 	char *xp = Xstring(s->xs, xp), *cp;
   1340 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
   1341 	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
   1342 
   1343 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1344 	XcheckN(s->xs, xp, LINE);
   1345 	*xp = '\0';
   1346 	s->start = s->str = xp;
   1347 
   1348 	if (have_tty && ksh_tmout) {
   1349 		ksh_tmout_state = TMOUT_READING;
   1350 		alarm(ksh_tmout);
   1351 	}
   1352 	if (interactive) {
   1353 		if (cur_prompt == PS1)
   1354 			histsave(&s->line, NULL, HIST_FLUSH, true);
   1355 		change_winsz();
   1356 	}
   1357 #ifndef MKSH_NO_CMDLINE_EDITING
   1358 	if (have_tty && (
   1359 #if !MKSH_S_NOVI
   1360 	    Flag(FVI) ||
   1361 #endif
   1362 	    Flag(FEMACS) || Flag(FGMACS))) {
   1363 		int nread;
   1364 
   1365 		nread = x_read(xp);
   1366 		if (nread < 0)
   1367 			/* read error */
   1368 			nread = 0;
   1369 		xp[nread] = '\0';
   1370 		xp += nread;
   1371 	} else
   1372 #endif
   1373 	  {
   1374 		if (interactive)
   1375 			pprompt(prompt, 0);
   1376 		else
   1377 			s->line++;
   1378 
   1379 		while (/* CONSTCOND */ 1) {
   1380 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1381 
   1382 			if (!p && shf_error(s->u.shf) &&
   1383 			    shf_errno(s->u.shf) == EINTR) {
   1384 				shf_clearerr(s->u.shf);
   1385 				if (trap)
   1386 					runtraps(0);
   1387 				continue;
   1388 			}
   1389 			if (!p || (xp = p, xp[-1] == '\n'))
   1390 				break;
   1391 			/* double buffer size */
   1392 			/* move past NUL so doubling works... */
   1393 			xp++;
   1394 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1395 			/* ...and move back again */
   1396 			xp--;
   1397 		}
   1398 		/*
   1399 		 * flush any unwanted input so other programs/builtins
   1400 		 * can read it. Not very optimal, but less error prone
   1401 		 * than flushing else where, dealing with redirections,
   1402 		 * etc.
   1403 		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
   1404 		 */
   1405 		if (s->type == SSTDIN)
   1406 			shf_flush(s->u.shf);
   1407 	}
   1408 	/*
   1409 	 * XXX: temporary kludge to restore source after a
   1410 	 * trap may have been executed.
   1411 	 */
   1412 	source = s;
   1413 	if (have_tty && ksh_tmout) {
   1414 		ksh_tmout_state = TMOUT_EXECUTING;
   1415 		alarm(0);
   1416 	}
   1417 	cp = Xstring(s->xs, xp);
   1418 	rndpush(cp);
   1419 	s->start = s->str = cp;
   1420 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1421 	/* Note: if input is all nulls, this is not eof */
   1422 	if (Xlength(s->xs, xp) == 0) {
   1423 		/* EOF */
   1424 		if (s->type == SFILE)
   1425 			shf_fdclose(s->u.shf);
   1426 		s->str = NULL;
   1427 	} else if (interactive && *s->str) {
   1428 		if (cur_prompt != PS1)
   1429 			histsave(&s->line, s->str, HIST_APPEND, true);
   1430 		else if (!ctype(*s->str, C_IFS | C_IFSWS))
   1431 			histsave(&s->line, s->str, HIST_QUEUE, true);
   1432 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
   1433 		else
   1434 			goto check_for_sole_return;
   1435 	} else if (interactive && cur_prompt == PS1) {
   1436  check_for_sole_return:
   1437 		cp = Xstring(s->xs, xp);
   1438 		while (ctype(*cp, C_IFSWS))
   1439 			++cp;
   1440 		if (!*cp) {
   1441 			histsave(&s->line, NULL, HIST_FLUSH, true);
   1442 			histsync();
   1443 		}
   1444 #endif
   1445 	}
   1446 	if (interactive)
   1447 		set_prompt(PS2, NULL);
   1448 }
   1449 
   1450 void
   1451 set_prompt(int to, Source *s)
   1452 {
   1453 	cur_prompt = (uint8_t)to;
   1454 
   1455 	switch (to) {
   1456 	/* command */
   1457 	case PS1:
   1458 		/*
   1459 		 * Substitute ! and !! here, before substitutions are done
   1460 		 * so ! in expanded variables are not expanded.
   1461 		 * NOTE: this is not what AT&T ksh does (it does it after
   1462 		 * substitutions, POSIX doesn't say which is to be done.
   1463 		 */
   1464 		{
   1465 			struct shf *shf;
   1466 			char * volatile ps1;
   1467 			Area *saved_atemp;
   1468 			int saved_lineno;
   1469 
   1470 			ps1 = str_val(global("PS1"));
   1471 			shf = shf_sopen(NULL, strlen(ps1) * 2,
   1472 			    SHF_WR | SHF_DYNAMIC, NULL);
   1473 			while (*ps1)
   1474 				if (*ps1 != '!' || *++ps1 == '!')
   1475 					shf_putchar(*ps1++, shf);
   1476 				else
   1477 					shf_fprintf(shf, Tf_lu, s ?
   1478 					    (unsigned long)s->line + 1 : 0UL);
   1479 			ps1 = shf_sclose(shf);
   1480 			saved_lineno = current_lineno;
   1481 			if (s)
   1482 				current_lineno = s->line + 1;
   1483 			saved_atemp = ATEMP;
   1484 			newenv(E_ERRH);
   1485 			if (kshsetjmp(e->jbuf)) {
   1486 				prompt = safe_prompt;
   1487 				/*
   1488 				 * Don't print an error - assume it has already
   1489 				 * been printed. Reason is we may have forked
   1490 				 * to run a command and the child may be
   1491 				 * unwinding its stack through this code as it
   1492 				 * exits.
   1493 				 */
   1494 			} else {
   1495 				char *cp = substitute(ps1, 0);
   1496 				strdupx(prompt, cp, saved_atemp);
   1497 			}
   1498 			current_lineno = saved_lineno;
   1499 			quitenv(NULL);
   1500 		}
   1501 		break;
   1502 	/* command continuation */
   1503 	case PS2:
   1504 		prompt = str_val(global("PS2"));
   1505 		break;
   1506 	}
   1507 }
   1508 
   1509 int
   1510 pprompt(const char *cp, int ntruncate)
   1511 {
   1512 	char delimiter = 0;
   1513 	bool doprint = (ntruncate != -1);
   1514 	bool indelimit = false;
   1515 	int columns = 0, lines = 0;
   1516 
   1517 	/*
   1518 	 * Undocumented AT&T ksh feature:
   1519 	 * If the second char in the prompt string is \r then the first
   1520 	 * char is taken to be a non-printing delimiter and any chars
   1521 	 * between two instances of the delimiter are not considered to
   1522 	 * be part of the prompt length
   1523 	 */
   1524 	if (*cp && cp[1] == '\r') {
   1525 		delimiter = *cp;
   1526 		cp += 2;
   1527 	}
   1528 	for (; *cp; cp++) {
   1529 		if (indelimit && *cp != delimiter)
   1530 			;
   1531 		else if (ctype(*cp, C_CR | C_LF)) {
   1532 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
   1533 			columns = 0;
   1534 		} else if (*cp == '\t') {
   1535 			columns = (columns | 7) + 1;
   1536 		} else if (*cp == '\b') {
   1537 			if (columns > 0)
   1538 				columns--;
   1539 		} else if (*cp == delimiter)
   1540 			indelimit = !indelimit;
   1541 		else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
   1542 			const char *cp2;
   1543 			columns += utf_widthadj(cp, &cp2);
   1544 			if (doprint && (indelimit ||
   1545 			    (ntruncate < (x_cols * lines + columns))))
   1546 				shf_write(cp, cp2 - cp, shl_out);
   1547 			cp = cp2 - /* loop increment */ 1;
   1548 			continue;
   1549 		} else
   1550 			columns++;
   1551 		if (doprint && (*cp != delimiter) &&
   1552 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
   1553 			shf_putc(*cp, shl_out);
   1554 	}
   1555 	if (doprint)
   1556 		shf_flush(shl_out);
   1557 	return (x_cols * lines + columns);
   1558 }
   1559 
   1560 /*
   1561  * Read the variable part of a ${...} expression (i.e. up to but not
   1562  * including the :[-+?=#%] or close-brace).
   1563  */
   1564 static char *
   1565 get_brace_var(XString *wsp, char *wp)
   1566 {
   1567 	char c;
   1568 	enum parse_state {
   1569 		PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
   1570 		PS_IDENT, PS_NUMBER, PS_VAR1
   1571 	} state = PS_INITIAL;
   1572 
   1573 	while (/* CONSTCOND */ 1) {
   1574 		c = getsc();
   1575 		/* State machine to figure out where the variable part ends. */
   1576 		switch (state) {
   1577 		case PS_SAW_HASH:
   1578 			if (ctype(c, C_VAR1)) {
   1579 				char c2;
   1580 
   1581 				c2 = getsc();
   1582 				ungetsc(c2);
   1583 				if (ord(c2) != ORD(/*{*/ '}')) {
   1584 					ungetsc(c);
   1585 					goto out;
   1586 				}
   1587 			}
   1588 			goto ps_common;
   1589 		case PS_SAW_BANG:
   1590 			switch (ord(c)) {
   1591 			case ORD('@'):
   1592 			case ORD('#'):
   1593 			case ORD('-'):
   1594 			case ORD('?'):
   1595 				goto out;
   1596 			}
   1597 			goto ps_common;
   1598 		case PS_INITIAL:
   1599 			switch (ord(c)) {
   1600 			case ORD('%'):
   1601 				state = PS_SAW_PERCENT;
   1602 				goto next;
   1603 			case ORD('#'):
   1604 				state = PS_SAW_HASH;
   1605 				goto next;
   1606 			case ORD('!'):
   1607 				state = PS_SAW_BANG;
   1608 				goto next;
   1609 			}
   1610 			/* FALLTHROUGH */
   1611 		case PS_SAW_PERCENT:
   1612  ps_common:
   1613 			if (ctype(c, C_ALPHX))
   1614 				state = PS_IDENT;
   1615 			else if (ctype(c, C_DIGIT))
   1616 				state = PS_NUMBER;
   1617 			else if (ctype(c, C_VAR1))
   1618 				state = PS_VAR1;
   1619 			else
   1620 				goto out;
   1621 			break;
   1622 		case PS_IDENT:
   1623 			if (!ctype(c, C_ALNUX)) {
   1624 				if (ord(c) == ORD('[')) {
   1625 					char *tmp, *p;
   1626 
   1627 					if (!arraysub(&tmp))
   1628 						yyerror("missing ]");
   1629 					*wp++ = c;
   1630 					p = tmp;
   1631 					while (*p) {
   1632 						Xcheck(*wsp, wp);
   1633 						*wp++ = *p++;
   1634 					}
   1635 					afree(tmp, ATEMP);
   1636 					/* the ] */
   1637 					c = getsc();
   1638 				}
   1639 				goto out;
   1640 			}
   1641  next:
   1642 			break;
   1643 		case PS_NUMBER:
   1644 			if (!ctype(c, C_DIGIT))
   1645 				goto out;
   1646 			break;
   1647 		case PS_VAR1:
   1648 			goto out;
   1649 		}
   1650 		Xcheck(*wsp, wp);
   1651 		*wp++ = c;
   1652 	}
   1653  out:
   1654 	/* end of variable part */
   1655 	*wp++ = '\0';
   1656 	ungetsc(c);
   1657 	return (wp);
   1658 }
   1659 
   1660 /*
   1661  * Save an array subscript - returns true if matching bracket found, false
   1662  * if eof or newline was found.
   1663  * (Returned string double null terminated)
   1664  */
   1665 static bool
   1666 arraysub(char **strp)
   1667 {
   1668 	XString ws;
   1669 	char *wp, c;
   1670 	/* we are just past the initial [ */
   1671 	unsigned int depth = 1;
   1672 
   1673 	Xinit(ws, wp, 32, ATEMP);
   1674 
   1675 	do {
   1676 		c = getsc();
   1677 		Xcheck(ws, wp);
   1678 		*wp++ = c;
   1679 		if (ord(c) == ORD('['))
   1680 			depth++;
   1681 		else if (ord(c) == ORD(']'))
   1682 			depth--;
   1683 	} while (depth > 0 && c && c != '\n');
   1684 
   1685 	*wp++ = '\0';
   1686 	*strp = Xclose(ws, wp);
   1687 
   1688 	return (tobool(depth == 0));
   1689 }
   1690 
   1691 /* Unget a char: handles case when we are already at the start of the buffer */
   1692 static void
   1693 ungetsc(int c)
   1694 {
   1695 	struct sretrace_info *rp = retrace_info;
   1696 
   1697 	if (backslash_skip)
   1698 		backslash_skip--;
   1699 	/* Don't unget EOF... */
   1700 	if (source->str == null && c == '\0')
   1701 		return;
   1702 	while (rp) {
   1703 		if (Xlength(rp->xs, rp->xp))
   1704 			rp->xp--;
   1705 		rp = rp->next;
   1706 	}
   1707 	ungetsc_i(c);
   1708 }
   1709 static void
   1710 ungetsc_i(int c)
   1711 {
   1712 	if (source->str > source->start)
   1713 		source->str--;
   1714 	else {
   1715 		Source *s;
   1716 
   1717 		s = pushs(SREREAD, source->areap);
   1718 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1719 		s->start = s->str = s->ugbuf;
   1720 		s->next = source;
   1721 		source = s;
   1722 	}
   1723 }
   1724 
   1725 
   1726 /* Called to get a char that isn't a \newline sequence. */
   1727 static int
   1728 getsc_bn(void)
   1729 {
   1730 	int c, c2;
   1731 
   1732 	if (ignore_backslash_newline)
   1733 		return (o_getsc_u());
   1734 
   1735 	if (backslash_skip == 1) {
   1736 		backslash_skip = 2;
   1737 		return (o_getsc_u());
   1738 	}
   1739 
   1740 	backslash_skip = 0;
   1741 
   1742 	while (/* CONSTCOND */ 1) {
   1743 		c = o_getsc_u();
   1744 		if (c == '\\') {
   1745 			if ((c2 = o_getsc_u()) == '\n')
   1746 				/* ignore the \newline; get the next char... */
   1747 				continue;
   1748 			ungetsc_i(c2);
   1749 			backslash_skip = 1;
   1750 		}
   1751 		return (c);
   1752 	}
   1753 }
   1754 
   1755 void
   1756 yyskiputf8bom(void)
   1757 {
   1758 	int c;
   1759 
   1760 	if (rtt2asc((c = o_getsc_u())) != 0xEF) {
   1761 		ungetsc_i(c);
   1762 		return;
   1763 	}
   1764 	if (rtt2asc((c = o_getsc_u())) != 0xBB) {
   1765 		ungetsc_i(c);
   1766 		ungetsc_i(asc2rtt(0xEF));
   1767 		return;
   1768 	}
   1769 	if (rtt2asc((c = o_getsc_u())) != 0xBF) {
   1770 		ungetsc_i(c);
   1771 		ungetsc_i(asc2rtt(0xBB));
   1772 		ungetsc_i(asc2rtt(0xEF));
   1773 		return;
   1774 	}
   1775 	UTFMODE |= 8;
   1776 }
   1777 
   1778 static Lex_state *
   1779 push_state_i(State_info *si, Lex_state *old_end)
   1780 {
   1781 	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
   1782 
   1783 	news[0].ls_base = old_end;
   1784 	si->base = &news[0];
   1785 	si->end = &news[STATE_BSIZE];
   1786 	return (&news[1]);
   1787 }
   1788 
   1789 static Lex_state *
   1790 pop_state_i(State_info *si, Lex_state *old_end)
   1791 {
   1792 	Lex_state *old_base = si->base;
   1793 
   1794 	si->base = old_end->ls_base - STATE_BSIZE;
   1795 	si->end = old_end->ls_base;
   1796 
   1797 	afree(old_base, ATEMP);
   1798 
   1799 	return (si->base + STATE_BSIZE - 1);
   1800 }
   1801