Home | History | Annotate | Download | only in src
      1 /*	$OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
      5  *	Thorsten Glaser <tg (at) mirbsd.org>
      6  *
      7  * Provided that these terms and disclaimer and all copyright notices
      8  * are retained or reproduced in an accompanying document, permission
      9  * is granted to deal in this work without restriction, including un-
     10  * limited rights to use, publicly perform, distribute, sell, modify,
     11  * merge, give away, or sublicence.
     12  *
     13  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
     14  * the utmost extent permitted by applicable law, neither express nor
     15  * implied; without malicious intent or gross negligence. In no event
     16  * may a licensor, author or contributor be held liable for indirect,
     17  * direct, other damage, loss, or other issues arising in any way out
     18  * of dealing in the work, even if advised of the possibility of such
     19  * damage or existence of a defect, except proven that it results out
     20  * of said person's immediate fault when using the work as intended.
     21  */
     22 
     23 #include "sh.h"
     24 
     25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.118 2010/07/25 11:35:41 tg Exp $");
     26 
     27 /*
     28  * states while lexing word
     29  */
     30 #define SBASE		0	/* outside any lexical constructs */
     31 #define SWORD		1	/* implicit quoting for substitute() */
     32 #define SLETPAREN	2	/* inside (( )), implicit quoting */
     33 #define SSQUOTE		3	/* inside '' */
     34 #define SDQUOTE		4	/* inside "" */
     35 #define SEQUOTE		5	/* inside $'' */
     36 #define SBRACE		6	/* inside ${} */
     37 #define SQBRACE		7	/* inside "${}" */
     38 #define SCSPAREN	8	/* inside $() */
     39 #define SBQUOTE		9	/* inside `` */
     40 #define SASPAREN	10	/* inside $(( )) */
     41 #define SHEREDELIM	11	/* parsing <<,<<- delimiter */
     42 #define SHEREDQUOTE	12	/* parsing " in <<,<<- delimiter */
     43 #define SPATTERN	13	/* parsing *(...|...) pattern (*+?@!) */
     44 #define STBRACE		14	/* parsing ${...[#%]...} */
     45 #define SLETARRAY	15	/* inside =( ), just copy */
     46 #define SADELIM		16	/* like SBASE, looking for delimiter */
     47 #define SHERESTRING	17	/* parsing <<< string */
     48 
     49 /* Structure to keep track of the lexing state and the various pieces of info
     50  * needed for each particular state. */
     51 typedef struct lex_state Lex_state;
     52 struct lex_state {
     53 	int ls_state;
     54 	union {
     55 		/* $(...) */
     56 		struct scsparen_info {
     57 			int nparen;	/* count open parenthesis */
     58 			int csstate;	/* XXX remove */
     59 #define ls_scsparen ls_info.u_scsparen
     60 		} u_scsparen;
     61 
     62 		/* $((...)) */
     63 		struct sasparen_info {
     64 			int nparen;	/* count open parenthesis */
     65 			int start;	/* marks start of $(( in output str */
     66 #define ls_sasparen ls_info.u_sasparen
     67 		} u_sasparen;
     68 
     69 		/* ((...)) */
     70 		struct sletparen_info {
     71 			int nparen;	/* count open parenthesis */
     72 #define ls_sletparen ls_info.u_sletparen
     73 		} u_sletparen;
     74 
     75 		/* `...` */
     76 		struct sbquote_info {
     77 			int indquotes;	/* true if in double quotes: "`...`" */
     78 #define ls_sbquote ls_info.u_sbquote
     79 		} u_sbquote;
     80 
     81 #ifndef MKSH_SMALL
     82 		/* =(...) */
     83 		struct sletarray_info {
     84 			int nparen;	/* count open parentheses */
     85 #define ls_sletarray ls_info.u_sletarray
     86 		} u_sletarray;
     87 #endif
     88 
     89 		/* ADELIM */
     90 		struct sadelim_info {
     91 			unsigned char nparen;	/* count open parentheses */
     92 #define SADELIM_BASH	0
     93 #define SADELIM_MAKE	1
     94 			unsigned char style;
     95 			unsigned char delimiter;
     96 			unsigned char num;
     97 			unsigned char flags;	/* ofs. into sadelim_flags[] */
     98 #define ls_sadelim ls_info.u_sadelim
     99 		} u_sadelim;
    100 
    101 		/* $'...' */
    102 		struct sequote_info {
    103 			bool got_NUL;	/* ignore rest of string */
    104 #define ls_sequote ls_info.u_sequote
    105 		} u_sequote;
    106 
    107 		Lex_state *base;	/* used to point to next state block */
    108 	} ls_info;
    109 };
    110 
    111 typedef struct {
    112 	Lex_state *base;
    113 	Lex_state *end;
    114 } State_info;
    115 
    116 static void readhere(struct ioword *);
    117 static int getsc__(void);
    118 static void getsc_line(Source *);
    119 static int getsc_bn(void);
    120 static int s_get(void);
    121 static void s_put(int);
    122 static char *get_brace_var(XString *, char *);
    123 static int arraysub(char **);
    124 static const char *ungetsc(int);
    125 static void gethere(bool);
    126 static Lex_state *push_state_(State_info *, Lex_state *);
    127 static Lex_state *pop_state_(State_info *, Lex_state *);
    128 
    129 static int dopprompt(const char *, int, bool);
    130 
    131 static int backslash_skip;
    132 static int ignore_backslash_newline;
    133 
    134 /* optimised getsc_bn() */
    135 #define _getsc()	(*source->str != '\0' && *source->str != '\\' \
    136 			 && !backslash_skip && !(source->flags & SF_FIRST) \
    137 			 ? *source->str++ : getsc_bn())
    138 /* optimised getsc__() */
    139 #define	_getsc_()	((*source->str != '\0') && !(source->flags & SF_FIRST) \
    140 			 ? *source->str++ : getsc__())
    141 
    142 #ifdef MKSH_SMALL
    143 static int getsc(void);
    144 static int getsc_(void);
    145 
    146 static int
    147 getsc(void)
    148 {
    149 	return (_getsc());
    150 }
    151 
    152 static int
    153 getsc_(void)
    154 {
    155 	return (_getsc_());
    156 }
    157 #else
    158 /* !MKSH_SMALL: use them inline */
    159 #define getsc()		_getsc()
    160 #define getsc_()	_getsc_()
    161 #endif
    162 
    163 #define STATE_BSIZE	32
    164 
    165 #define PUSH_STATE(s)	do {					\
    166 	if (++statep == state_info.end)				\
    167 		statep = push_state_(&state_info, statep);	\
    168 	state = statep->ls_state = (s);				\
    169 } while (0)
    170 
    171 #define POP_STATE()	do {					\
    172 	if (--statep == state_info.base)			\
    173 		statep = pop_state_(&state_info, statep);	\
    174 	state = statep->ls_state;				\
    175 } while (0)
    176 
    177 /**
    178  * Lexical analyser
    179  *
    180  * tokens are not regular expressions, they are LL(1).
    181  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
    182  * hence the state stack.
    183  */
    184 
    185 int
    186 yylex(int cf)
    187 {
    188 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
    189 	State_info state_info;
    190 	int c, c2, state;
    191 	XString ws;		/* expandable output word */
    192 	char *wp;		/* output word pointer */
    193 	char *sp, *dp;
    194 
    195  Again:
    196 	states[0].ls_state = -1;
    197 	states[0].ls_info.base = NULL;
    198 	statep = &states[1];
    199 	state_info.base = states;
    200 	state_info.end = &state_info.base[STATE_BSIZE];
    201 
    202 	Xinit(ws, wp, 64, ATEMP);
    203 
    204 	backslash_skip = 0;
    205 	ignore_backslash_newline = 0;
    206 
    207 	if (cf&ONEWORD)
    208 		state = SWORD;
    209 	else if (cf&LETEXPR) {
    210 		/* enclose arguments in (double) quotes */
    211 		*wp++ = OQUOTE;
    212 		state = SLETPAREN;
    213 		statep->ls_sletparen.nparen = 0;
    214 #ifndef MKSH_SMALL
    215 	} else if (cf&LETARRAY) {
    216 		state = SLETARRAY;
    217 		statep->ls_sletarray.nparen = 0;
    218 #endif
    219 	} else {		/* normal lexing */
    220 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
    221 		while ((c = getsc()) == ' ' || c == '\t')
    222 			;
    223 		if (c == '#') {
    224 			ignore_backslash_newline++;
    225 			while ((c = getsc()) != '\0' && c != '\n')
    226 				;
    227 			ignore_backslash_newline--;
    228 		}
    229 		ungetsc(c);
    230 	}
    231 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
    232 		source->flags &= ~SF_ALIAS;
    233 		cf |= ALIAS;
    234 	}
    235 
    236 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
    237 	statep->ls_state = state;
    238 
    239 	/* check for here string */
    240 	if (state == SHEREDELIM) {
    241 		c = getsc();
    242 		if (c == '<') {
    243 			state = SHERESTRING;
    244 			while ((c = getsc()) == ' ' || c == '\t')
    245 				;
    246 			ungetsc(c);
    247 			c = '<';
    248 			goto accept_nonword;
    249 		}
    250 		ungetsc(c);
    251 	}
    252 
    253 	/* collect non-special or quoted characters to form word */
    254 	while (!((c = getsc()) == 0 ||
    255 	    ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
    256 	    ctype(c, C_LEX1)))) {
    257  accept_nonword:
    258 		Xcheck(ws, wp);
    259 		switch (state) {
    260 		case SADELIM:
    261 			if (c == '(')
    262 				statep->ls_sadelim.nparen++;
    263 			else if (c == ')')
    264 				statep->ls_sadelim.nparen--;
    265 			else if (statep->ls_sadelim.nparen == 0 &&
    266 			    (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) {
    267 				*wp++ = ADELIM;
    268 				*wp++ = c;
    269 				if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0)
    270 					POP_STATE();
    271 				if (c == /*{*/ '}')
    272 					POP_STATE();
    273 				break;
    274 			}
    275 			/* FALLTHROUGH */
    276 		case SBASE:
    277 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
    278 				*wp = EOS;	/* temporary */
    279 				if (is_wdvarname(Xstring(ws, wp), false)) {
    280 					char *p, *tmp;
    281 
    282 					if (arraysub(&tmp)) {
    283 						*wp++ = CHAR;
    284 						*wp++ = c;
    285 						for (p = tmp; *p; ) {
    286 							Xcheck(ws, wp);
    287 							*wp++ = CHAR;
    288 							*wp++ = *p++;
    289 						}
    290 						afree(tmp, ATEMP);
    291 						break;
    292 					} else {
    293 						Source *s;
    294 
    295 						s = pushs(SREREAD,
    296 						    source->areap);
    297 						s->start = s->str =
    298 						    s->u.freeme = tmp;
    299 						s->next = source;
    300 						source = s;
    301 					}
    302 				}
    303 				*wp++ = CHAR;
    304 				*wp++ = c;
    305 				break;
    306 			}
    307 			/* FALLTHROUGH */
    308  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
    309 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
    310 			    c == '!') {
    311 				c2 = getsc();
    312 				if (c2 == '(' /*)*/ ) {
    313 					*wp++ = OPAT;
    314 					*wp++ = c;
    315 					PUSH_STATE(SPATTERN);
    316 					break;
    317 				}
    318 				ungetsc(c2);
    319 			}
    320 			/* FALLTHROUGH */
    321  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
    322 			switch (c) {
    323 			case '\\':
    324  getsc_qchar:
    325 				if ((c = getsc())) {
    326 					/* trailing \ is lost */
    327 					*wp++ = QCHAR;
    328 					*wp++ = c;
    329 				}
    330 				break;
    331 			case '\'':
    332  open_ssquote:
    333 				*wp++ = OQUOTE;
    334 				ignore_backslash_newline++;
    335 				PUSH_STATE(SSQUOTE);
    336 				break;
    337 			case '"':
    338  open_sdquote:
    339 				*wp++ = OQUOTE;
    340 				PUSH_STATE(SDQUOTE);
    341 				break;
    342 			default:
    343 				goto Subst;
    344 			}
    345 			break;
    346 
    347  Subst:
    348 			switch (c) {
    349 			case '\\':
    350 				c = getsc();
    351 				switch (c) {
    352 				case '"':
    353 					if ((cf & HEREDOC))
    354 						goto heredocquote;
    355 					/* FALLTHROUGH */
    356 				case '\\':
    357 				case '$': case '`':
    358  store_qchar:
    359 					*wp++ = QCHAR;
    360 					*wp++ = c;
    361 					break;
    362 				default:
    363  heredocquote:
    364 					Xcheck(ws, wp);
    365 					if (c) {
    366 						/* trailing \ is lost */
    367 						*wp++ = CHAR;
    368 						*wp++ = '\\';
    369 						*wp++ = CHAR;
    370 						*wp++ = c;
    371 					}
    372 					break;
    373 				}
    374 				break;
    375 			case '$':
    376  subst_dollar:
    377 				c = getsc();
    378 				if (c == '(') /*)*/ {
    379 					c = getsc();
    380 					if (c == '(') /*)*/ {
    381 						PUSH_STATE(SASPAREN);
    382 						statep->ls_sasparen.nparen = 2;
    383 						statep->ls_sasparen.start =
    384 						    Xsavepos(ws, wp);
    385 						*wp++ = EXPRSUB;
    386 					} else {
    387 						ungetsc(c);
    388 						PUSH_STATE(SCSPAREN);
    389 						statep->ls_scsparen.nparen = 1;
    390 						statep->ls_scsparen.csstate = 0;
    391 						*wp++ = COMSUB;
    392 					}
    393 				} else if (c == '{') /*}*/ {
    394 					*wp++ = OSUBST;
    395 					*wp++ = '{'; /*}*/
    396 					wp = get_brace_var(&ws, wp);
    397 					c = getsc();
    398 					/* allow :# and :% (ksh88 compat) */
    399 					if (c == ':') {
    400 						*wp++ = CHAR;
    401 						*wp++ = c;
    402 						c = getsc();
    403 						if (c == ':') {
    404 							*wp++ = CHAR;
    405 							*wp++ = '0';
    406 							*wp++ = ADELIM;
    407 							*wp++ = ':';
    408 							PUSH_STATE(SBRACE);
    409 							PUSH_STATE(SADELIM);
    410 							statep->ls_sadelim.style = SADELIM_BASH;
    411 							statep->ls_sadelim.delimiter = ':';
    412 							statep->ls_sadelim.num = 1;
    413 							statep->ls_sadelim.nparen = 0;
    414 							break;
    415 						} else if (ksh_isdigit(c) ||
    416 						    c == '('/*)*/ || c == ' ' ||
    417 						    c == '$' /* XXX what else? */) {
    418 							/* substring subst. */
    419 							if (c != ' ') {
    420 								*wp++ = CHAR;
    421 								*wp++ = ' ';
    422 							}
    423 							ungetsc(c);
    424 							PUSH_STATE(SBRACE);
    425 							PUSH_STATE(SADELIM);
    426 							statep->ls_sadelim.style = SADELIM_BASH;
    427 							statep->ls_sadelim.delimiter = ':';
    428 							statep->ls_sadelim.num = 2;
    429 							statep->ls_sadelim.nparen = 0;
    430 							break;
    431 						}
    432 					} else if (c == '/') {
    433 						*wp++ = CHAR;
    434 						*wp++ = c;
    435 						if ((c = getsc()) == '/') {
    436 							*wp++ = ADELIM;
    437 							*wp++ = c;
    438 						} else
    439 							ungetsc(c);
    440 						PUSH_STATE(SBRACE);
    441 						PUSH_STATE(SADELIM);
    442 						statep->ls_sadelim.style = SADELIM_BASH;
    443 						statep->ls_sadelim.delimiter = '/';
    444 						statep->ls_sadelim.num = 1;
    445 						statep->ls_sadelim.nparen = 0;
    446 						break;
    447 					}
    448 					/* If this is a trim operation,
    449 					 * treat (,|,) specially in STBRACE.
    450 					 */
    451 					if (ctype(c, C_SUBOP2)) {
    452 						ungetsc(c);
    453 						PUSH_STATE(STBRACE);
    454 					} else {
    455 						ungetsc(c);
    456 						if (state == SDQUOTE)
    457 							PUSH_STATE(SQBRACE);
    458 						else
    459 							PUSH_STATE(SBRACE);
    460 					}
    461 				} else if (ksh_isalphx(c)) {
    462 					*wp++ = OSUBST;
    463 					*wp++ = 'X';
    464 					do {
    465 						Xcheck(ws, wp);
    466 						*wp++ = c;
    467 						c = getsc();
    468 					} while (ksh_isalnux(c));
    469 					*wp++ = '\0';
    470 					*wp++ = CSUBST;
    471 					*wp++ = 'X';
    472 					ungetsc(c);
    473 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
    474 					Xcheck(ws, wp);
    475 					*wp++ = OSUBST;
    476 					*wp++ = 'X';
    477 					*wp++ = c;
    478 					*wp++ = '\0';
    479 					*wp++ = CSUBST;
    480 					*wp++ = 'X';
    481 				} else if (c == '\'' && (state == SBASE)) {
    482 					/* XXX which other states are valid? */
    483 					*wp++ = OQUOTE;
    484 					ignore_backslash_newline++;
    485 					PUSH_STATE(SEQUOTE);
    486 					statep->ls_sequote.got_NUL = false;
    487 					break;
    488 				} else {
    489 					*wp++ = CHAR;
    490 					*wp++ = '$';
    491 					ungetsc(c);
    492 				}
    493 				break;
    494 			case '`':
    495  subst_gravis:
    496 				PUSH_STATE(SBQUOTE);
    497 				*wp++ = COMSUB;
    498 				/* Need to know if we are inside double quotes
    499 				 * since sh/AT&T-ksh translate the \" to " in
    500 				 * "`...\"...`".
    501 				 * This is not done in POSIX mode (section
    502 				 * 3.2.3, Double Quotes: "The backquote shall
    503 				 * retain its special meaning introducing the
    504 				 * other form of command substitution (see
    505 				 * 3.6.3). The portion of the quoted string
    506 				 * from the initial backquote and the
    507 				 * characters up to the next backquote that
    508 				 * is not preceded by a backslash (having
    509 				 * escape characters removed) defines that
    510 				 * command whose output replaces `...` when
    511 				 * the word is expanded."
    512 				 * Section 3.6.3, Command Substitution:
    513 				 * "Within the backquoted style of command
    514 				 * substitution, backslash shall retain its
    515 				 * literal meaning, except when followed by
    516 				 * $ ` \.").
    517 				 */
    518 				statep->ls_sbquote.indquotes = 0;
    519 				s2 = statep;
    520 				base = state_info.base;
    521 				while (1) {
    522 					for (; s2 != base; s2--) {
    523 						if (s2->ls_state == SDQUOTE) {
    524 							statep->ls_sbquote.indquotes = 1;
    525 							break;
    526 						}
    527 					}
    528 					if (s2 != base)
    529 						break;
    530 					if (!(s2 = s2->ls_info.base))
    531 						break;
    532 					base = s2-- - STATE_BSIZE;
    533 				}
    534 				break;
    535 			case QCHAR:
    536 				if (cf & LQCHAR) {
    537 					*wp++ = QCHAR;
    538 					*wp++ = getsc();
    539 					break;
    540 				}
    541 				/* FALLTHROUGH */
    542 			default:
    543  store_char:
    544 				*wp++ = CHAR;
    545 				*wp++ = c;
    546 			}
    547 			break;
    548 
    549 		case SEQUOTE:
    550 			if (c == '\'') {
    551 				POP_STATE();
    552 				*wp++ = CQUOTE;
    553 				ignore_backslash_newline--;
    554 			} else if (c == '\\') {
    555 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
    556 					c2 = s_get();
    557 				if (c2 == 0)
    558 					statep->ls_sequote.got_NUL = true;
    559 				if (!statep->ls_sequote.got_NUL) {
    560 					char ts[4];
    561 
    562 					if ((unsigned int)c2 < 0x100) {
    563 						*wp++ = QCHAR;
    564 						*wp++ = c2;
    565 					} else {
    566 						c = utf_wctomb(ts, c2 - 0x100);
    567 						ts[c] = 0;
    568 						for (c = 0; ts[c]; ++c) {
    569 							*wp++ = QCHAR;
    570 							*wp++ = ts[c];
    571 						}
    572 					}
    573 				}
    574 			} else if (!statep->ls_sequote.got_NUL) {
    575 				*wp++ = QCHAR;
    576 				*wp++ = c;
    577 			}
    578 			break;
    579 
    580 		case SSQUOTE:
    581 			if (c == '\'') {
    582 				POP_STATE();
    583 				*wp++ = CQUOTE;
    584 				ignore_backslash_newline--;
    585 			} else {
    586 				*wp++ = QCHAR;
    587 				*wp++ = c;
    588 			}
    589 			break;
    590 
    591 		case SDQUOTE:
    592 			if (c == '"') {
    593 				POP_STATE();
    594 				*wp++ = CQUOTE;
    595 			} else
    596 				goto Subst;
    597 			break;
    598 
    599 		case SCSPAREN:	/* $( ... ) */
    600 			/* todo: deal with $(...) quoting properly
    601 			 * kludge to partly fake quoting inside $(...): doesn't
    602 			 * really work because nested $(...) or ${...} inside
    603 			 * double quotes aren't dealt with.
    604 			 */
    605 			switch (statep->ls_scsparen.csstate) {
    606 			case 0:	/* normal */
    607 				switch (c) {
    608 				case '(':
    609 					statep->ls_scsparen.nparen++;
    610 					break;
    611 				case ')':
    612 					statep->ls_scsparen.nparen--;
    613 					break;
    614 				case '\\':
    615 					statep->ls_scsparen.csstate = 1;
    616 					break;
    617 				case '"':
    618 					statep->ls_scsparen.csstate = 2;
    619 					break;
    620 				case '\'':
    621 					statep->ls_scsparen.csstate = 4;
    622 					ignore_backslash_newline++;
    623 					break;
    624 				}
    625 				break;
    626 
    627 			case 1:	/* backslash in normal mode */
    628 			case 3:	/* backslash in double quotes */
    629 				--statep->ls_scsparen.csstate;
    630 				break;
    631 
    632 			case 2:	/* double quotes */
    633 				if (c == '"')
    634 					statep->ls_scsparen.csstate = 0;
    635 				else if (c == '\\')
    636 					statep->ls_scsparen.csstate = 3;
    637 				break;
    638 
    639 			case 4:	/* single quotes */
    640 				if (c == '\'') {
    641 					statep->ls_scsparen.csstate = 0;
    642 					ignore_backslash_newline--;
    643 				}
    644 				break;
    645 			}
    646 			if (statep->ls_scsparen.nparen == 0) {
    647 				POP_STATE();
    648 				*wp++ = 0;	/* end of COMSUB */
    649 			} else
    650 				*wp++ = c;
    651 			break;
    652 
    653 		case SASPAREN:	/* $(( ... )) */
    654 			/* XXX should nest using existing state machine
    655 			 * (embed "...", $(...), etc.) */
    656 			if (c == '(')
    657 				statep->ls_sasparen.nparen++;
    658 			else if (c == ')') {
    659 				statep->ls_sasparen.nparen--;
    660 				if (statep->ls_sasparen.nparen == 1) {
    661 					/*(*/
    662 					if ((c2 = getsc()) == ')') {
    663 						POP_STATE();
    664 						/* end of EXPRSUB */
    665 						*wp++ = 0;
    666 						break;
    667 					} else {
    668 						char *s;
    669 
    670 						ungetsc(c2);
    671 						/* mismatched parenthesis -
    672 						 * assume we were really
    673 						 * parsing a $(...) expression
    674 						 */
    675 						s = Xrestpos(ws, wp,
    676 						    statep->ls_sasparen.start);
    677 						memmove(s + 1, s, wp - s);
    678 						*s++ = COMSUB;
    679 						*s = '('; /*)*/
    680 						wp++;
    681 						statep->ls_scsparen.nparen = 1;
    682 						statep->ls_scsparen.csstate = 0;
    683 						state = statep->ls_state =
    684 						    SCSPAREN;
    685 					}
    686 				}
    687 			}
    688 			*wp++ = c;
    689 			break;
    690 
    691 		case SQBRACE:
    692 			if (c == '\\') {
    693 				/*
    694 				 * perform POSIX "quote removal" if the back-
    695 				 * slash is "special", i.e. same cases as the
    696 				 * {case '\\':} in Subst: plus closing brace;
    697 				 * in mksh code "quote removal" on '\c' means
    698 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
    699 				 * emitted (in heredocquote:)
    700 				 */
    701 				if ((c = getsc()) == '"' || c == '\\' ||
    702 				    c == '$' || c == '`' || c == /*{*/'}')
    703 					goto store_qchar;
    704 				goto heredocquote;
    705 			}
    706 			goto common_SQBRACE;
    707 
    708 		case SBRACE:
    709 			if (c == '\'')
    710 				goto open_ssquote;
    711 			else if (c == '\\')
    712 				goto getsc_qchar;
    713  common_SQBRACE:
    714 			if (c == '"')
    715 				goto open_sdquote;
    716 			else if (c == '$')
    717 				goto subst_dollar;
    718 			else if (c == '`')
    719 				goto subst_gravis;
    720 			else if (c != /*{*/ '}')
    721 				goto store_char;
    722 			POP_STATE();
    723 			*wp++ = CSUBST;
    724 			*wp++ = /*{*/ '}';
    725 			break;
    726 
    727 		case STBRACE:
    728 			/* Same as SBASE, except (,|,) treated specially */
    729 			if (c == /*{*/ '}') {
    730 				POP_STATE();
    731 				*wp++ = CSUBST;
    732 				*wp++ = /*{*/ '}';
    733 			} else if (c == '|') {
    734 				*wp++ = SPAT;
    735 			} else if (c == '(') {
    736 				*wp++ = OPAT;
    737 				*wp++ = ' ';	/* simile for @ */
    738 				PUSH_STATE(SPATTERN);
    739 			} else
    740 				goto Sbase1;
    741 			break;
    742 
    743 		case SBQUOTE:
    744 			if (c == '`') {
    745 				*wp++ = 0;
    746 				POP_STATE();
    747 			} else if (c == '\\') {
    748 				switch (c = getsc()) {
    749 				case '\\':
    750 				case '$': case '`':
    751 					*wp++ = c;
    752 					break;
    753 				case '"':
    754 					if (statep->ls_sbquote.indquotes) {
    755 						*wp++ = c;
    756 						break;
    757 					}
    758 					/* FALLTHROUGH */
    759 				default:
    760 					if (c) {
    761 						/* trailing \ is lost */
    762 						*wp++ = '\\';
    763 						*wp++ = c;
    764 					}
    765 					break;
    766 				}
    767 			} else
    768 				*wp++ = c;
    769 			break;
    770 
    771 		case SWORD:	/* ONEWORD */
    772 			goto Subst;
    773 
    774 		case SLETPAREN:	/* LETEXPR: (( ... )) */
    775 			/*(*/
    776 			if (c == ')') {
    777 				if (statep->ls_sletparen.nparen > 0)
    778 					--statep->ls_sletparen.nparen;
    779 				else if ((c2 = getsc()) == /*(*/ ')') {
    780 					c = 0;
    781 					*wp++ = CQUOTE;
    782 					goto Done;
    783 				} else {
    784 					Source *s;
    785 
    786 					ungetsc(c2);
    787 					/* mismatched parenthesis -
    788 					 * assume we were really
    789 					 * parsing a $(...) expression
    790 					 */
    791 					*wp = EOS;
    792 					sp = Xstring(ws, wp);
    793 					dp = wdstrip(sp, true, false);
    794 					s = pushs(SREREAD, source->areap);
    795 					s->start = s->str = s->u.freeme = dp;
    796 					s->next = source;
    797 					source = s;
    798 					return ('('/*)*/);
    799 				}
    800 			} else if (c == '(')
    801 				/* parenthesis inside quotes and backslashes
    802 				 * are lost, but AT&T ksh doesn't count them
    803 				 * either
    804 				 */
    805 				++statep->ls_sletparen.nparen;
    806 			goto Sbase2;
    807 
    808 #ifndef MKSH_SMALL
    809 		case SLETARRAY:	/* LETARRAY: =( ... ) */
    810 			if (c == '('/*)*/)
    811 				++statep->ls_sletarray.nparen;
    812 			else if (c == /*(*/')')
    813 				if (statep->ls_sletarray.nparen-- == 0) {
    814 					c = 0;
    815 					goto Done;
    816 				}
    817 			*wp++ = CHAR;
    818 			*wp++ = c;
    819 			break;
    820 #endif
    821 
    822 		case SHERESTRING:	/* <<< delimiter */
    823 			if (c == '\\') {
    824 				c = getsc();
    825 				if (c) {
    826 					/* trailing \ is lost */
    827 					*wp++ = QCHAR;
    828 					*wp++ = c;
    829 				}
    830 				/* invoke quoting mode */
    831 				Xstring(ws, wp)[0] = QCHAR;
    832 			} else if (c == '$') {
    833 				if ((c2 = getsc()) == '\'') {
    834 					PUSH_STATE(SEQUOTE);
    835 					statep->ls_sequote.got_NUL = false;
    836 					goto sherestring_quoted;
    837 				}
    838 				ungetsc(c2);
    839 				goto sherestring_regular;
    840 			} else if (c == '\'') {
    841 				PUSH_STATE(SSQUOTE);
    842  sherestring_quoted:
    843 				*wp++ = OQUOTE;
    844 				ignore_backslash_newline++;
    845 				/* invoke quoting mode */
    846 				Xstring(ws, wp)[0] = QCHAR;
    847 			} else if (c == '"') {
    848 				state = statep->ls_state = SHEREDQUOTE;
    849 				*wp++ = OQUOTE;
    850 				/* just don't IFS split; no quoting mode */
    851 			} else {
    852  sherestring_regular:
    853 				*wp++ = CHAR;
    854 				*wp++ = c;
    855 			}
    856 			break;
    857 
    858 		case SHEREDELIM:	/* <<,<<- delimiter */
    859 			/* XXX chuck this state (and the next) - use
    860 			 * the existing states ($ and \`...` should be
    861 			 * stripped of their specialness after the
    862 			 * fact).
    863 			 */
    864 			/* here delimiters need a special case since
    865 			 * $ and `...` are not to be treated specially
    866 			 */
    867 			if (c == '\\') {
    868 				c = getsc();
    869 				if (c) {
    870 					/* trailing \ is lost */
    871 					*wp++ = QCHAR;
    872 					*wp++ = c;
    873 				}
    874 			} else if (c == '$') {
    875 				if ((c2 = getsc()) == '\'') {
    876 					PUSH_STATE(SEQUOTE);
    877 					statep->ls_sequote.got_NUL = false;
    878 					goto sheredelim_quoted;
    879 				}
    880 				ungetsc(c2);
    881 				goto sheredelim_regular;
    882 			} else if (c == '\'') {
    883 				PUSH_STATE(SSQUOTE);
    884  sheredelim_quoted:
    885 				*wp++ = OQUOTE;
    886 				ignore_backslash_newline++;
    887 			} else if (c == '"') {
    888 				state = statep->ls_state = SHEREDQUOTE;
    889 				*wp++ = OQUOTE;
    890 			} else {
    891  sheredelim_regular:
    892 				*wp++ = CHAR;
    893 				*wp++ = c;
    894 			}
    895 			break;
    896 
    897 		case SHEREDQUOTE:	/* " in <<,<<- delimiter */
    898 			if (c == '"') {
    899 				*wp++ = CQUOTE;
    900 				state = statep->ls_state =
    901 				    /* dp[1] == '<' means here string */
    902 				    Xstring(ws, wp)[1] == '<' ?
    903 				    SHERESTRING : SHEREDELIM;
    904 			} else {
    905 				if (c == '\\') {
    906 					switch (c = getsc()) {
    907 					case '\\': case '"':
    908 					case '$': case '`':
    909 						break;
    910 					default:
    911 						if (c) {
    912 							/* trailing \ lost */
    913 							*wp++ = CHAR;
    914 							*wp++ = '\\';
    915 						}
    916 						break;
    917 					}
    918 				}
    919 				*wp++ = CHAR;
    920 				*wp++ = c;
    921 			}
    922 			break;
    923 
    924 		case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
    925 			if ( /*(*/ c == ')') {
    926 				*wp++ = CPAT;
    927 				POP_STATE();
    928 			} else if (c == '|') {
    929 				*wp++ = SPAT;
    930 			} else if (c == '(') {
    931 				*wp++ = OPAT;
    932 				*wp++ = ' ';	/* simile for @ */
    933 				PUSH_STATE(SPATTERN);
    934 			} else
    935 				goto Sbase1;
    936 			break;
    937 		}
    938 	}
    939  Done:
    940 	Xcheck(ws, wp);
    941 	if (statep != &states[1])
    942 		/* XXX figure out what is missing */
    943 		yyerror("no closing quote\n");
    944 
    945 #ifndef MKSH_SMALL
    946 	if (state == SLETARRAY && statep->ls_sletarray.nparen != -1)
    947 		yyerror("%s: ')' missing\n", T_synerr);
    948 #endif
    949 
    950 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
    951 	if (state == SHEREDELIM || state == SHERESTRING)
    952 		state = SBASE;
    953 
    954 	dp = Xstring(ws, wp);
    955 	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
    956 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
    957 
    958 		if (Xlength(ws, wp) == 0)
    959 			iop->unit = c == '<' ? 0 : 1;
    960 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
    961 			if (dp[c2] != CHAR)
    962 				goto no_iop;
    963 			if (!ksh_isdigit(dp[c2 + 1]))
    964 				goto no_iop;
    965 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
    966 		}
    967 
    968 		if (iop->unit >= FDBASE)
    969 			goto no_iop;
    970 
    971 		if (c == '&') {
    972 			if ((c2 = getsc()) != '>') {
    973 				ungetsc(c2);
    974 				goto no_iop;
    975 			}
    976 			c = c2;
    977 			iop->flag = IOBASH;
    978 		} else
    979 			iop->flag = 0;
    980 
    981 		c2 = getsc();
    982 		/* <<, >>, <> are ok, >< is not */
    983 		if (c == c2 || (c == '<' && c2 == '>')) {
    984 			iop->flag |= c == c2 ?
    985 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
    986 			if (iop->flag == IOHERE) {
    987 				if ((c2 = getsc()) == '-')
    988 					iop->flag |= IOSKIP;
    989 				else
    990 					ungetsc(c2);
    991 			}
    992 		} else if (c2 == '&')
    993 			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
    994 		else {
    995 			iop->flag |= c == '>' ? IOWRITE : IOREAD;
    996 			if (c == '>' && c2 == '|')
    997 				iop->flag |= IOCLOB;
    998 			else
    999 				ungetsc(c2);
   1000 		}
   1001 
   1002 		iop->name = NULL;
   1003 		iop->delim = NULL;
   1004 		iop->heredoc = NULL;
   1005 		Xfree(ws, wp);	/* free word */
   1006 		yylval.iop = iop;
   1007 		return (REDIR);
   1008  no_iop:
   1009 		;
   1010 	}
   1011 
   1012 	if (wp == dp && state == SBASE) {
   1013 		Xfree(ws, wp);	/* free word */
   1014 		/* no word, process LEX1 character */
   1015 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
   1016 			if ((c2 = getsc()) == c)
   1017 				c = (c == ';') ? BREAK :
   1018 				    (c == '|') ? LOGOR :
   1019 				    (c == '&') ? LOGAND :
   1020 				    /* c == '(' ) */ MDPAREN;
   1021 			else if (c == '|' && c2 == '&')
   1022 				c = COPROC;
   1023 			else
   1024 				ungetsc(c2);
   1025 		} else if (c == '\n') {
   1026 			gethere(false);
   1027 			if (cf & CONTIN)
   1028 				goto Again;
   1029 		} else if (c == '\0')
   1030 			/* need here strings at EOF */
   1031 			gethere(true);
   1032 		return (c);
   1033 	}
   1034 
   1035 	*wp++ = EOS;		/* terminate word */
   1036 	yylval.cp = Xclose(ws, wp);
   1037 	if (state == SWORD || state == SLETPAREN
   1038 	    /* XXX ONEWORD? */
   1039 #ifndef MKSH_SMALL
   1040 	    || state == SLETARRAY
   1041 #endif
   1042 	    )
   1043 		return (LWORD);
   1044 
   1045 	/* unget terminator */
   1046 	ungetsc(c);
   1047 
   1048 	/*
   1049 	 * note: the alias-vs-function code below depends on several
   1050 	 * interna: starting from here, source->str is not modified;
   1051 	 * the way getsc() and ungetsc() operate; etc.
   1052 	 */
   1053 
   1054 	/* copy word to unprefixed string ident */
   1055 	sp = yylval.cp;
   1056 	dp = ident;
   1057 	if ((cf & HEREDELIM) && (sp[1] == '<'))
   1058 		while (dp < ident+IDENT) {
   1059 			if ((c = *sp++) == CHAR)
   1060 				*dp++ = *sp++;
   1061 			else if ((c != OQUOTE) && (c != CQUOTE))
   1062 				break;
   1063 		}
   1064 	else
   1065 		while (dp < ident+IDENT && (c = *sp++) == CHAR)
   1066 			*dp++ = *sp++;
   1067 	/* Make sure the ident array stays '\0' padded */
   1068 	memset(dp, 0, (ident+IDENT) - dp + 1);
   1069 	if (c != EOS)
   1070 		*ident = '\0';	/* word is not unquoted */
   1071 
   1072 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
   1073 		struct tbl *p;
   1074 		uint32_t h = hash(ident);
   1075 
   1076 		/* { */
   1077 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
   1078 		    (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
   1079 			afree(yylval.cp, ATEMP);
   1080 			return (p->val.i);
   1081 		}
   1082 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
   1083 		    (p->flag & ISSET)) {
   1084 			/*
   1085 			 * this still points to the same character as the
   1086 			 * ungetsc'd terminator from above
   1087 			 */
   1088 			const char *cp = source->str;
   1089 
   1090 			/* prefer POSIX but not Korn functions over aliases */
   1091 			while (*cp == ' ' || *cp == '\t')
   1092 				/*
   1093 				 * this is like getsc() without skipping
   1094 				 * over Source boundaries (including not
   1095 				 * parsing ungetsc'd characters that got
   1096 				 * pushed into an SREREAD) which is what
   1097 				 * we want here anyway: find out whether
   1098 				 * the alias name is followed by a POSIX
   1099 				 * function definition (only the opening
   1100 				 * parenthesis is checked though)
   1101 				 */
   1102 				++cp;
   1103 			/* prefer functions over aliases */
   1104 			if (*cp == '(' /*)*/)
   1105 				/*
   1106 				 * delete alias upon encountering function
   1107 				 * definition
   1108 				 */
   1109 				ktdelete(p);
   1110 			else {
   1111 				Source *s = source;
   1112 
   1113 				while (s && (s->flags & SF_HASALIAS))
   1114 					if (s->u.tblp == p)
   1115 						return (LWORD);
   1116 					else
   1117 						s = s->next;
   1118 				/* push alias expansion */
   1119 				s = pushs(SALIAS, source->areap);
   1120 				s->start = s->str = p->val.s;
   1121 				s->u.tblp = p;
   1122 				s->flags |= SF_HASALIAS;
   1123 				s->next = source;
   1124 				if (source->type == SEOF) {
   1125 					/* prevent infinite recursion at EOS */
   1126 					source->u.tblp = p;
   1127 					source->flags |= SF_HASALIAS;
   1128 				}
   1129 				source = s;
   1130 				afree(yylval.cp, ATEMP);
   1131 				goto Again;
   1132 			}
   1133 		}
   1134 	}
   1135 
   1136 	return (LWORD);
   1137 }
   1138 
   1139 static void
   1140 gethere(bool iseof)
   1141 {
   1142 	struct ioword **p;
   1143 
   1144 	for (p = heres; p < herep; p++)
   1145 		if (iseof && (*p)->delim[1] != '<')
   1146 			/* only here strings at EOF */
   1147 			return;
   1148 		else
   1149 			readhere(*p);
   1150 	herep = heres;
   1151 }
   1152 
   1153 /*
   1154  * read "<<word" text into temp file
   1155  */
   1156 
   1157 static void
   1158 readhere(struct ioword *iop)
   1159 {
   1160 	int c;
   1161 	char *volatile eof;
   1162 	char *eofp;
   1163 	int skiptabs;
   1164 	XString xs;
   1165 	char *xp;
   1166 	int xpos;
   1167 
   1168 	if (iop->delim[1] == '<') {
   1169 		/* process the here string */
   1170 		xp = iop->heredoc = evalstr(iop->delim, DOBLANK);
   1171 		c = strlen(xp) - 1;
   1172 		memmove(xp, xp + 1, c);
   1173 		xp[c] = '\n';
   1174 		return;
   1175 	}
   1176 
   1177 	eof = evalstr(iop->delim, 0);
   1178 
   1179 	if (!(iop->flag & IOEVAL))
   1180 		ignore_backslash_newline++;
   1181 
   1182 	Xinit(xs, xp, 256, ATEMP);
   1183 
   1184 	for (;;) {
   1185 		eofp = eof;
   1186 		skiptabs = iop->flag & IOSKIP;
   1187 		xpos = Xsavepos(xs, xp);
   1188 		while ((c = getsc()) != 0) {
   1189 			if (skiptabs) {
   1190 				if (c == '\t')
   1191 					continue;
   1192 				skiptabs = 0;
   1193 			}
   1194 			if (c != *eofp)
   1195 				break;
   1196 			Xcheck(xs, xp);
   1197 			Xput(xs, xp, c);
   1198 			eofp++;
   1199 		}
   1200 		/* Allow EOF here so commands with out trailing newlines
   1201 		 * will work (eg, ksh -c '...', $(...), etc).
   1202 		 */
   1203 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
   1204 			xp = Xrestpos(xs, xp, xpos);
   1205 			break;
   1206 		}
   1207 		ungetsc(c);
   1208 		while ((c = getsc()) != '\n') {
   1209 			if (c == 0)
   1210 				yyerror("here document '%s' unclosed\n", eof);
   1211 			Xcheck(xs, xp);
   1212 			Xput(xs, xp, c);
   1213 		}
   1214 		Xcheck(xs, xp);
   1215 		Xput(xs, xp, c);
   1216 	}
   1217 	Xput(xs, xp, '\0');
   1218 	iop->heredoc = Xclose(xs, xp);
   1219 
   1220 	if (!(iop->flag & IOEVAL))
   1221 		ignore_backslash_newline--;
   1222 }
   1223 
   1224 void
   1225 yyerror(const char *fmt, ...)
   1226 {
   1227 	va_list va;
   1228 
   1229 	/* pop aliases and re-reads */
   1230 	while (source->type == SALIAS || source->type == SREREAD)
   1231 		source = source->next;
   1232 	source->str = null;	/* zap pending input */
   1233 
   1234 	error_prefix(true);
   1235 	va_start(va, fmt);
   1236 	shf_vfprintf(shl_out, fmt, va);
   1237 	va_end(va);
   1238 	errorfz();
   1239 }
   1240 
   1241 /*
   1242  * input for yylex with alias expansion
   1243  */
   1244 
   1245 Source *
   1246 pushs(int type, Area *areap)
   1247 {
   1248 	Source *s;
   1249 
   1250 	s = alloc(sizeof(Source), areap);
   1251 	memset(s, 0, sizeof(Source));
   1252 	s->type = type;
   1253 	s->str = null;
   1254 	s->areap = areap;
   1255 	if (type == SFILE || type == SSTDIN)
   1256 		XinitN(s->xs, 256, s->areap);
   1257 	return (s);
   1258 }
   1259 
   1260 static int
   1261 getsc__(void)
   1262 {
   1263 	Source *s = source;
   1264 	int c;
   1265 
   1266  getsc_again:
   1267 	while ((c = *s->str++) == 0) {
   1268 		s->str = NULL;		/* return 0 for EOF by default */
   1269 		switch (s->type) {
   1270 		case SEOF:
   1271 			s->str = null;
   1272 			return (0);
   1273 
   1274 		case SSTDIN:
   1275 		case SFILE:
   1276 			getsc_line(s);
   1277 			break;
   1278 
   1279 		case SWSTR:
   1280 			break;
   1281 
   1282 		case SSTRING:
   1283 			break;
   1284 
   1285 		case SWORDS:
   1286 			s->start = s->str = *s->u.strv++;
   1287 			s->type = SWORDSEP;
   1288 			break;
   1289 
   1290 		case SWORDSEP:
   1291 			if (*s->u.strv == NULL) {
   1292 				s->start = s->str = "\n";
   1293 				s->type = SEOF;
   1294 			} else {
   1295 				s->start = s->str = " ";
   1296 				s->type = SWORDS;
   1297 			}
   1298 			break;
   1299 
   1300 		case SALIAS:
   1301 			if (s->flags & SF_ALIASEND) {
   1302 				/* pass on an unused SF_ALIAS flag */
   1303 				source = s->next;
   1304 				source->flags |= s->flags & SF_ALIAS;
   1305 				s = source;
   1306 			} else if (*s->u.tblp->val.s &&
   1307 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
   1308 				source = s = s->next;	/* pop source stack */
   1309 				/* Note that this alias ended with a space,
   1310 				 * enabling alias expansion on the following
   1311 				 * word.
   1312 				 */
   1313 				s->flags |= SF_ALIAS;
   1314 			} else {
   1315 				/* At this point, we need to keep the current
   1316 				 * alias in the source list so recursive
   1317 				 * aliases can be detected and we also need
   1318 				 * to return the next character. Do this
   1319 				 * by temporarily popping the alias to get
   1320 				 * the next character and then put it back
   1321 				 * in the source list with the SF_ALIASEND
   1322 				 * flag set.
   1323 				 */
   1324 				source = s->next;	/* pop source stack */
   1325 				source->flags |= s->flags & SF_ALIAS;
   1326 				c = getsc__();
   1327 				if (c) {
   1328 					s->flags |= SF_ALIASEND;
   1329 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1330 					s->start = s->str = s->ugbuf;
   1331 					s->next = source;
   1332 					source = s;
   1333 				} else {
   1334 					s = source;
   1335 					/* avoid reading eof twice */
   1336 					s->str = NULL;
   1337 					break;
   1338 				}
   1339 			}
   1340 			continue;
   1341 
   1342 		case SREREAD:
   1343 			if (s->start != s->ugbuf)	/* yuck */
   1344 				afree(s->u.freeme, ATEMP);
   1345 			source = s = s->next;
   1346 			continue;
   1347 		}
   1348 		if (s->str == NULL) {
   1349 			s->type = SEOF;
   1350 			s->start = s->str = null;
   1351 			return ('\0');
   1352 		}
   1353 		if (s->flags & SF_ECHO) {
   1354 			shf_puts(s->str, shl_out);
   1355 			shf_flush(shl_out);
   1356 		}
   1357 	}
   1358 	/* check for UTF-8 byte order mark */
   1359 	if (s->flags & SF_FIRST) {
   1360 		s->flags &= ~SF_FIRST;
   1361 		if (((unsigned char)c == 0xEF) &&
   1362 		    (((const unsigned char *)(s->str))[0] == 0xBB) &&
   1363 		    (((const unsigned char *)(s->str))[1] == 0xBF)) {
   1364 			s->str += 2;
   1365 			UTFMODE = 1;
   1366 			goto getsc_again;
   1367 		}
   1368 	}
   1369 	return (c);
   1370 }
   1371 
   1372 static void
   1373 getsc_line(Source *s)
   1374 {
   1375 	char *xp = Xstring(s->xs, xp), *cp;
   1376 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
   1377 	int have_tty = interactive && (s->flags & SF_TTY);
   1378 
   1379 	/* Done here to ensure nothing odd happens when a timeout occurs */
   1380 	XcheckN(s->xs, xp, LINE);
   1381 	*xp = '\0';
   1382 	s->start = s->str = xp;
   1383 
   1384 	if (have_tty && ksh_tmout) {
   1385 		ksh_tmout_state = TMOUT_READING;
   1386 		alarm(ksh_tmout);
   1387 	}
   1388 	if (interactive)
   1389 		change_winsz();
   1390 	if (have_tty && (
   1391 #if !MKSH_S_NOVI
   1392 	    Flag(FVI) ||
   1393 #endif
   1394 	    Flag(FEMACS) || Flag(FGMACS))) {
   1395 		int nread;
   1396 
   1397 		nread = x_read(xp, LINE);
   1398 		if (nread < 0)	/* read error */
   1399 			nread = 0;
   1400 		xp[nread] = '\0';
   1401 		xp += nread;
   1402 	} else {
   1403 		if (interactive)
   1404 			pprompt(prompt, 0);
   1405 		else
   1406 			s->line++;
   1407 
   1408 		while (1) {
   1409 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
   1410 
   1411 			if (!p && shf_error(s->u.shf) &&
   1412 			    shf_errno(s->u.shf) == EINTR) {
   1413 				shf_clearerr(s->u.shf);
   1414 				if (trap)
   1415 					runtraps(0);
   1416 				continue;
   1417 			}
   1418 			if (!p || (xp = p, xp[-1] == '\n'))
   1419 				break;
   1420 			/* double buffer size */
   1421 			xp++;	/* move past NUL so doubling works... */
   1422 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
   1423 			xp--;	/* ...and move back again */
   1424 		}
   1425 		/* flush any unwanted input so other programs/builtins
   1426 		 * can read it. Not very optimal, but less error prone
   1427 		 * than flushing else where, dealing with redirections,
   1428 		 * etc.
   1429 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
   1430 		 */
   1431 		if (s->type == SSTDIN)
   1432 			shf_flush(s->u.shf);
   1433 	}
   1434 	/* XXX: temporary kludge to restore source after a
   1435 	 * trap may have been executed.
   1436 	 */
   1437 	source = s;
   1438 	if (have_tty && ksh_tmout) {
   1439 		ksh_tmout_state = TMOUT_EXECUTING;
   1440 		alarm(0);
   1441 	}
   1442 	cp = Xstring(s->xs, xp);
   1443 #ifndef MKSH_SMALL
   1444 	if (interactive && *cp == '!' && cur_prompt == PS1) {
   1445 		int linelen;
   1446 
   1447 		linelen = Xlength(s->xs, xp);
   1448 		XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1);
   1449 		/* reload after potential realloc */
   1450 		cp = Xstring(s->xs, xp);
   1451 		/* change initial '!' into space */
   1452 		*cp = ' ';
   1453 		/* NUL terminate the current string */
   1454 		*xp = '\0';
   1455 		/* move the actual string forward */
   1456 		memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1);
   1457 		xp += fc_e_n;
   1458 		/* prepend it with "fc -e -" */
   1459 		memcpy(cp, fc_e_, fc_e_n);
   1460 	}
   1461 #endif
   1462 	s->start = s->str = cp;
   1463 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
   1464 	/* Note: if input is all nulls, this is not eof */
   1465 	if (Xlength(s->xs, xp) == 0) {
   1466 		/* EOF */
   1467 		if (s->type == SFILE)
   1468 			shf_fdclose(s->u.shf);
   1469 		s->str = NULL;
   1470 	} else if (interactive && *s->str &&
   1471 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
   1472 		histsave(&s->line, s->str, true, true);
   1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
   1474 	} else if (interactive && cur_prompt == PS1) {
   1475 		cp = Xstring(s->xs, xp);
   1476 		while (*cp && ctype(*cp, C_IFSWS))
   1477 			++cp;
   1478 		if (!*cp)
   1479 			histsync();
   1480 #endif
   1481 	}
   1482 	if (interactive)
   1483 		set_prompt(PS2, NULL);
   1484 }
   1485 
   1486 void
   1487 set_prompt(int to, Source *s)
   1488 {
   1489 	cur_prompt = to;
   1490 
   1491 	switch (to) {
   1492 	case PS1:	/* command */
   1493 		/* Substitute ! and !! here, before substitutions are done
   1494 		 * so ! in expanded variables are not expanded.
   1495 		 * NOTE: this is not what AT&T ksh does (it does it after
   1496 		 * substitutions, POSIX doesn't say which is to be done.
   1497 		 */
   1498 		{
   1499 			struct shf *shf;
   1500 			char * volatile ps1;
   1501 			Area *saved_atemp;
   1502 
   1503 			ps1 = str_val(global("PS1"));
   1504 			shf = shf_sopen(NULL, strlen(ps1) * 2,
   1505 			    SHF_WR | SHF_DYNAMIC, NULL);
   1506 			while (*ps1)
   1507 				if (*ps1 != '!' || *++ps1 == '!')
   1508 					shf_putchar(*ps1++, shf);
   1509 				else
   1510 					shf_fprintf(shf, "%d",
   1511 						s ? s->line + 1 : 0);
   1512 			ps1 = shf_sclose(shf);
   1513 			saved_atemp = ATEMP;
   1514 			newenv(E_ERRH);
   1515 			if (sigsetjmp(e->jbuf, 0)) {
   1516 				prompt = safe_prompt;
   1517 				/* Don't print an error - assume it has already
   1518 				 * been printed. Reason is we may have forked
   1519 				 * to run a command and the child may be
   1520 				 * unwinding its stack through this code as it
   1521 				 * exits.
   1522 				 */
   1523 			} else {
   1524 				char *cp = substitute(ps1, 0);
   1525 				strdupx(prompt, cp, saved_atemp);
   1526 			}
   1527 			quitenv(NULL);
   1528 		}
   1529 		break;
   1530 	case PS2:	/* command continuation */
   1531 		prompt = str_val(global("PS2"));
   1532 		break;
   1533 	}
   1534 }
   1535 
   1536 static int
   1537 dopprompt(const char *cp, int ntruncate, bool doprint)
   1538 {
   1539 	int columns = 0, lines = 0, indelimit = 0;
   1540 	char delimiter = 0;
   1541 
   1542 	/* Undocumented AT&T ksh feature:
   1543 	 * If the second char in the prompt string is \r then the first char
   1544 	 * is taken to be a non-printing delimiter and any chars between two
   1545 	 * instances of the delimiter are not considered to be part of the
   1546 	 * prompt length
   1547 	 */
   1548 	if (*cp && cp[1] == '\r') {
   1549 		delimiter = *cp;
   1550 		cp += 2;
   1551 	}
   1552 	for (; *cp; cp++) {
   1553 		if (indelimit && *cp != delimiter)
   1554 			;
   1555 		else if (*cp == '\n' || *cp == '\r') {
   1556 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
   1557 			columns = 0;
   1558 		} else if (*cp == '\t') {
   1559 			columns = (columns | 7) + 1;
   1560 		} else if (*cp == '\b') {
   1561 			if (columns > 0)
   1562 				columns--;
   1563 		} else if (*cp == delimiter)
   1564 			indelimit = !indelimit;
   1565 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
   1566 			const char *cp2;
   1567 			columns += utf_widthadj(cp, &cp2);
   1568 			if (doprint && (indelimit ||
   1569 			    (ntruncate < (x_cols * lines + columns))))
   1570 				shf_write(cp, cp2 - cp, shl_out);
   1571 			cp = cp2 - /* loop increment */ 1;
   1572 			continue;
   1573 		} else
   1574 			columns++;
   1575 		if (doprint && (*cp != delimiter) &&
   1576 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
   1577 			shf_putc(*cp, shl_out);
   1578 	}
   1579 	if (doprint)
   1580 		shf_flush(shl_out);
   1581 	return (x_cols * lines + columns);
   1582 }
   1583 
   1584 
   1585 void
   1586 pprompt(const char *cp, int ntruncate)
   1587 {
   1588 	dopprompt(cp, ntruncate, true);
   1589 }
   1590 
   1591 int
   1592 promptlen(const char *cp)
   1593 {
   1594 	return (dopprompt(cp, 0, false));
   1595 }
   1596 
   1597 /* Read the variable part of a ${...} expression (ie, up to but not including
   1598  * the :[-+?=#%] or close-brace.
   1599  */
   1600 static char *
   1601 get_brace_var(XString *wsp, char *wp)
   1602 {
   1603 	enum parse_state {
   1604 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
   1605 		PS_NUMBER, PS_VAR1
   1606 	} state;
   1607 	char c;
   1608 
   1609 	state = PS_INITIAL;
   1610 	while (1) {
   1611 		c = getsc();
   1612 		/* State machine to figure out where the variable part ends. */
   1613 		switch (state) {
   1614 		case PS_INITIAL:
   1615 			if (c == '#' || c == '!' || c == '%') {
   1616 				state = PS_SAW_HASH;
   1617 				break;
   1618 			}
   1619 			/* FALLTHROUGH */
   1620 		case PS_SAW_HASH:
   1621 			if (ksh_isalphx(c))
   1622 				state = PS_IDENT;
   1623 			else if (ksh_isdigit(c))
   1624 				state = PS_NUMBER;
   1625 			else if (ctype(c, C_VAR1))
   1626 				state = PS_VAR1;
   1627 			else
   1628 				goto out;
   1629 			break;
   1630 		case PS_IDENT:
   1631 			if (!ksh_isalnux(c)) {
   1632 				if (c == '[') {
   1633 					char *tmp, *p;
   1634 
   1635 					if (!arraysub(&tmp))
   1636 						yyerror("missing ]\n");
   1637 					*wp++ = c;
   1638 					for (p = tmp; *p; ) {
   1639 						Xcheck(*wsp, wp);
   1640 						*wp++ = *p++;
   1641 					}
   1642 					afree(tmp, ATEMP);
   1643 					c = getsc();	/* the ] */
   1644 				}
   1645 				goto out;
   1646 			}
   1647 			break;
   1648 		case PS_NUMBER:
   1649 			if (!ksh_isdigit(c))
   1650 				goto out;
   1651 			break;
   1652 		case PS_VAR1:
   1653 			goto out;
   1654 		}
   1655 		Xcheck(*wsp, wp);
   1656 		*wp++ = c;
   1657 	}
   1658  out:
   1659 	*wp++ = '\0';	/* end of variable part */
   1660 	ungetsc(c);
   1661 	return (wp);
   1662 }
   1663 
   1664 /*
   1665  * Save an array subscript - returns true if matching bracket found, false
   1666  * if eof or newline was found.
   1667  * (Returned string double null terminated)
   1668  */
   1669 static int
   1670 arraysub(char **strp)
   1671 {
   1672 	XString ws;
   1673 	char	*wp;
   1674 	char	c;
   1675 	int	depth = 1;	/* we are just past the initial [ */
   1676 
   1677 	Xinit(ws, wp, 32, ATEMP);
   1678 
   1679 	do {
   1680 		c = getsc();
   1681 		Xcheck(ws, wp);
   1682 		*wp++ = c;
   1683 		if (c == '[')
   1684 			depth++;
   1685 		else if (c == ']')
   1686 			depth--;
   1687 	} while (depth > 0 && c && c != '\n');
   1688 
   1689 	*wp++ = '\0';
   1690 	*strp = Xclose(ws, wp);
   1691 
   1692 	return (depth == 0 ? 1 : 0);
   1693 }
   1694 
   1695 /* Unget a char: handles case when we are already at the start of the buffer */
   1696 static const char *
   1697 ungetsc(int c)
   1698 {
   1699 	if (backslash_skip)
   1700 		backslash_skip--;
   1701 	/* Don't unget eof... */
   1702 	if (source->str == null && c == '\0')
   1703 		return (source->str);
   1704 	if (source->str > source->start)
   1705 		source->str--;
   1706 	else {
   1707 		Source *s;
   1708 
   1709 		s = pushs(SREREAD, source->areap);
   1710 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
   1711 		s->start = s->str = s->ugbuf;
   1712 		s->next = source;
   1713 		source = s;
   1714 	}
   1715 	return (source->str);
   1716 }
   1717 
   1718 
   1719 /* Called to get a char that isn't a \newline sequence. */
   1720 static int
   1721 getsc_bn(void)
   1722 {
   1723 	int c, c2;
   1724 
   1725 	if (ignore_backslash_newline)
   1726 		return (getsc_());
   1727 
   1728 	if (backslash_skip == 1) {
   1729 		backslash_skip = 2;
   1730 		return (getsc_());
   1731 	}
   1732 
   1733 	backslash_skip = 0;
   1734 
   1735 	while (1) {
   1736 		c = getsc_();
   1737 		if (c == '\\') {
   1738 			if ((c2 = getsc_()) == '\n')
   1739 				/* ignore the \newline; get the next char... */
   1740 				continue;
   1741 			ungetsc(c2);
   1742 			backslash_skip = 1;
   1743 		}
   1744 		return (c);
   1745 	}
   1746 }
   1747 
   1748 static Lex_state *
   1749 push_state_(State_info *si, Lex_state *old_end)
   1750 {
   1751 	Lex_state *news = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP);
   1752 
   1753 	news[0].ls_info.base = old_end;
   1754 	si->base = &news[0];
   1755 	si->end = &news[STATE_BSIZE];
   1756 	return (&news[1]);
   1757 }
   1758 
   1759 static Lex_state *
   1760 pop_state_(State_info *si, Lex_state *old_end)
   1761 {
   1762 	Lex_state *old_base = si->base;
   1763 
   1764 	si->base = old_end->ls_info.base - STATE_BSIZE;
   1765 	si->end = old_end->ls_info.base;
   1766 
   1767 	afree(old_base, ATEMP);
   1768 
   1769 	return (si->base + STATE_BSIZE - 1);
   1770 }
   1771 
   1772 static int
   1773 s_get(void)
   1774 {
   1775 	return (getsc());
   1776 }
   1777 
   1778 static void
   1779 s_put(int c)
   1780 {
   1781 	ungetsc(c);
   1782 }
   1783