1 /* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 * 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 6 * mirabilos <m (at) mirbsd.org> 7 * 8 * Provided that these terms and disclaimer and all copyright notices 9 * are retained or reproduced in an accompanying document, permission 10 * is granted to deal in this work without restriction, including un- 11 * limited rights to use, publicly perform, distribute, sell, modify, 12 * merge, give away, or sublicence. 13 * 14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 15 * the utmost extent permitted by applicable law, neither express nor 16 * implied; without malicious intent or gross negligence. In no event 17 * may a licensor, author or contributor be held liable for indirect, 18 * direct, other damage, loss, or other issues arising in any way out 19 * of dealing in the work, even if advised of the possibility of such 20 * damage or existence of a defect, except proven that it results out 21 * of said person's immediate fault when using the work as intended. 22 */ 23 24 #include "sh.h" 25 26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.250 2018/10/20 18:34:14 tg Exp $"); 27 28 /* 29 * states while lexing word 30 */ 31 #define SBASE 0 /* outside any lexical constructs */ 32 #define SWORD 1 /* implicit quoting for substitute() */ 33 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 34 #define SSQUOTE 3 /* inside '' */ 35 #define SDQUOTE 4 /* inside "" */ 36 #define SEQUOTE 5 /* inside $'' */ 37 #define SBRACE 6 /* inside ${} */ 38 #define SQBRACE 7 /* inside "${}" */ 39 #define SBQUOTE 8 /* inside `` */ 40 #define SASPAREN 9 /* inside $(( )) */ 41 #define SHEREDELIM 10 /* parsing << or <<- delimiter */ 42 #define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */ 43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 44 #define SADELIM 13 /* like SBASE, looking for delimiter */ 45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 size_t start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 /* extra flags */ 81 uint8_t ls_flags; 82 } Lex_state; 83 #define ls_base u.base 84 #define ls_start u.start 85 #define ls_bool u.abool 86 #define ls_adelim u.adelim 87 88 /* ls_flags */ 89 #define LS_HEREDOC BIT(0) 90 91 typedef struct { 92 Lex_state *base; 93 Lex_state *end; 94 } State_info; 95 96 static void readhere(struct ioword *); 97 static void ungetsc(int); 98 static void ungetsc_i(int); 99 static int getsc_uu(void); 100 static void getsc_line(Source *); 101 static int getsc_bn(void); 102 static int getsc_i(void); 103 static char *get_brace_var(XString *, char *); 104 static bool arraysub(char **); 105 static void gethere(void); 106 static Lex_state *push_state_i(State_info *, Lex_state *); 107 static Lex_state *pop_state_i(State_info *, Lex_state *); 108 109 static int backslash_skip; 110 static int ignore_backslash_newline; 111 112 /* optimised getsc_bn() */ 113 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 114 !backslash_skip ? *source->str++ : getsc_bn()) 115 /* optimised getsc_uu() */ 116 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 117 118 /* retrace helper */ 119 #define o_getsc_r(carg) \ 120 int cev = (carg); \ 121 struct sretrace_info *rp = retrace_info; \ 122 \ 123 while (rp) { \ 124 Xcheck(rp->xs, rp->xp); \ 125 *rp->xp++ = cev; \ 126 rp = rp->next; \ 127 } \ 128 \ 129 return (cev); 130 131 /* callback */ 132 static int 133 getsc_i(void) 134 { 135 o_getsc_r((unsigned int)(unsigned char)o_getsc()); 136 } 137 138 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) 139 #define getsc() getsc_i() 140 #else 141 static int getsc_r(int); 142 143 static int 144 getsc_r(int c) 145 { 146 o_getsc_r(c); 147 } 148 149 #define getsc() getsc_r((unsigned int)(unsigned char)o_getsc()) 150 #endif 151 152 #define STATE_BSIZE 8 153 154 #define PUSH_STATE(s) do { \ 155 uint8_t state_flags = statep->ls_flags; \ 156 if (++statep == state_info.end) \ 157 statep = push_state_i(&state_info, statep); \ 158 state = statep->type = (s); \ 159 statep->ls_flags = state_flags; \ 160 } while (/* CONSTCOND */ 0) 161 162 #define POP_STATE() do { \ 163 if (--statep == state_info.base) \ 164 statep = pop_state_i(&state_info, statep); \ 165 state = statep->type; \ 166 } while (/* CONSTCOND */ 0) 167 168 #define PUSH_SRETRACE(s) do { \ 169 struct sretrace_info *ri; \ 170 \ 171 PUSH_STATE(s); \ 172 statep->ls_start = Xsavepos(ws, wp); \ 173 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 174 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 175 ri->next = retrace_info; \ 176 retrace_info = ri; \ 177 } while (/* CONSTCOND */ 0) 178 179 #define POP_SRETRACE() do { \ 180 wp = Xrestpos(ws, wp, statep->ls_start); \ 181 *retrace_info->xp = '\0'; \ 182 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 183 dp = (void *)retrace_info; \ 184 retrace_info = retrace_info->next; \ 185 afree(dp, ATEMP); \ 186 POP_STATE(); \ 187 } while (/* CONSTCOND */ 0) 188 189 /** 190 * Lexical analyser 191 * 192 * tokens are not regular expressions, they are LL(1). 193 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 194 * hence the state stack. Note "$(...)" are now parsed recursively. 195 */ 196 197 int 198 yylex(int cf) 199 { 200 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 201 State_info state_info; 202 int c, c2, state; 203 size_t cz; 204 XString ws; /* expandable output word */ 205 char *wp; /* output word pointer */ 206 char *sp, *dp; 207 208 Again: 209 states[0].type = SINVALID; 210 states[0].ls_base = NULL; 211 statep = &states[1]; 212 state_info.base = states; 213 state_info.end = &state_info.base[STATE_BSIZE]; 214 215 Xinit(ws, wp, 64, ATEMP); 216 217 backslash_skip = 0; 218 ignore_backslash_newline = 0; 219 220 if (cf & ONEWORD) 221 state = SWORD; 222 else if (cf & LETEXPR) { 223 /* enclose arguments in (double) quotes */ 224 *wp++ = OQUOTE; 225 state = SLETPAREN; 226 statep->nparen = 0; 227 } else { 228 /* normal lexing */ 229 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 230 do { 231 c = getsc(); 232 } while (ctype(c, C_BLANK)); 233 if (c == '#') { 234 ignore_backslash_newline++; 235 do { 236 c = getsc(); 237 } while (!ctype(c, C_NUL | C_LF)); 238 ignore_backslash_newline--; 239 } 240 ungetsc(c); 241 } 242 if (source->flags & SF_ALIAS) { 243 /* trailing ' ' in alias definition */ 244 source->flags &= ~SF_ALIAS; 245 /* POSIX: trailing space only counts if parsing simple cmd */ 246 if (!Flag(FPOSIX) || (cf & CMDWORD)) 247 cf |= ALIAS; 248 } 249 250 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 251 statep->type = state; 252 statep->ls_flags = (cf & HEREDOC) ? LS_HEREDOC : 0; 253 254 /* collect non-special or quoted characters to form word */ 255 while (!((c = getsc()) == 0 || 256 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { 257 if (state == SBASE && 258 subshell_nesting_type == ORD(/*{*/ '}') && 259 (unsigned int)c == ORD(/*{*/ '}')) 260 /* possibly end ${ :;} */ 261 break; 262 Xcheck(ws, wp); 263 switch (state) { 264 case SADELIM: 265 if ((unsigned int)c == ORD('(')) 266 statep->nparen++; 267 else if ((unsigned int)c == ORD(')')) 268 statep->nparen--; 269 else if (statep->nparen == 0 && 270 ((unsigned int)c == ORD(/*{*/ '}') || 271 c == (int)statep->ls_adelim.delimiter)) { 272 *wp++ = ADELIM; 273 *wp++ = c; 274 if ((unsigned int)c == ORD(/*{*/ '}') || 275 --statep->ls_adelim.num == 0) 276 POP_STATE(); 277 if ((unsigned int)c == ORD(/*{*/ '}')) 278 POP_STATE(); 279 break; 280 } 281 /* FALLTHROUGH */ 282 case SBASE: 283 if ((unsigned int)c == ORD('[') && (cf & CMDASN)) { 284 /* temporary */ 285 *wp = EOS; 286 if (is_wdvarname(Xstring(ws, wp), false)) { 287 char *p, *tmp; 288 289 if (arraysub(&tmp)) { 290 *wp++ = CHAR; 291 *wp++ = c; 292 for (p = tmp; *p; ) { 293 Xcheck(ws, wp); 294 *wp++ = CHAR; 295 *wp++ = *p++; 296 } 297 afree(tmp, ATEMP); 298 break; 299 } 300 } 301 *wp++ = CHAR; 302 *wp++ = c; 303 break; 304 } 305 /* FALLTHROUGH */ 306 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 307 if (ctype(c, C_PATMO)) { 308 c2 = getsc(); 309 if ((unsigned int)c2 == ORD('(' /*)*/)) { 310 *wp++ = OPAT; 311 *wp++ = c; 312 PUSH_STATE(SPATTERN); 313 break; 314 } 315 ungetsc(c2); 316 } 317 /* FALLTHROUGH */ 318 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 319 switch (c) { 320 case ORD('\\'): 321 getsc_qchar: 322 if ((c = getsc())) { 323 /* trailing \ is lost */ 324 *wp++ = QCHAR; 325 *wp++ = c; 326 } 327 break; 328 case ORD('\''): 329 open_ssquote_unless_heredoc: 330 if ((statep->ls_flags & LS_HEREDOC)) 331 goto store_char; 332 *wp++ = OQUOTE; 333 ignore_backslash_newline++; 334 PUSH_STATE(SSQUOTE); 335 break; 336 case ORD('"'): 337 open_sdquote: 338 *wp++ = OQUOTE; 339 PUSH_STATE(SDQUOTE); 340 break; 341 case ORD('$'): 342 /* 343 * processing of dollar sign belongs into 344 * Subst, except for those which can open 345 * a string: $'' and $"" 346 */ 347 subst_dollar_ex: 348 c = getsc(); 349 switch (c) { 350 case ORD('"'): 351 goto open_sdquote; 352 case ORD('\''): 353 goto open_sequote; 354 default: 355 goto SubstS; 356 } 357 default: 358 goto Subst; 359 } 360 break; 361 362 Subst: 363 switch (c) { 364 case ORD('\\'): 365 c = getsc(); 366 switch (c) { 367 case ORD('"'): 368 if ((statep->ls_flags & LS_HEREDOC)) 369 goto heredocquote; 370 /* FALLTHROUGH */ 371 case ORD('\\'): 372 case ORD('$'): 373 case ORD('`'): 374 store_qchar: 375 *wp++ = QCHAR; 376 *wp++ = c; 377 break; 378 default: 379 heredocquote: 380 Xcheck(ws, wp); 381 if (c) { 382 /* trailing \ is lost */ 383 *wp++ = CHAR; 384 *wp++ = '\\'; 385 *wp++ = CHAR; 386 *wp++ = c; 387 } 388 break; 389 } 390 break; 391 case ORD('$'): 392 c = getsc(); 393 SubstS: 394 if ((unsigned int)c == ORD('(' /*)*/)) { 395 c = getsc(); 396 if ((unsigned int)c == ORD('(' /*)*/)) { 397 *wp++ = EXPRSUB; 398 PUSH_SRETRACE(SASPAREN); 399 /* unneeded? */ 400 /*statep->ls_flags &= ~LS_HEREDOC;*/ 401 statep->nparen = 2; 402 *retrace_info->xp++ = '('; 403 } else { 404 ungetsc(c); 405 subst_command: 406 c = COMSUB; 407 subst_command2: 408 sp = yyrecursive(c); 409 cz = strlen(sp) + 1; 410 XcheckN(ws, wp, cz); 411 *wp++ = c; 412 memcpy(wp, sp, cz); 413 wp += cz; 414 } 415 } else if ((unsigned int)c == ORD('{' /*}*/)) { 416 if ((unsigned int)(c = getsc()) == ORD('|')) { 417 /* 418 * non-subenvironment 419 * value substitution 420 */ 421 c = VALSUB; 422 goto subst_command2; 423 } else if (ctype(c, C_IFSWS)) { 424 /* 425 * non-subenvironment 426 * "command" substitution 427 */ 428 c = FUNSUB; 429 goto subst_command2; 430 } 431 ungetsc(c); 432 *wp++ = OSUBST; 433 *wp++ = '{' /*}*/; 434 wp = get_brace_var(&ws, wp); 435 c = getsc(); 436 /* allow :# and :% (ksh88 compat) */ 437 if ((unsigned int)c == ORD(':')) { 438 *wp++ = CHAR; 439 *wp++ = c; 440 c = getsc(); 441 if ((unsigned int)c == ORD(':')) { 442 *wp++ = CHAR; 443 *wp++ = '0'; 444 *wp++ = ADELIM; 445 *wp++ = ':'; 446 PUSH_STATE(SBRACE); 447 /* perhaps unneeded? */ 448 statep->ls_flags &= ~LS_HEREDOC; 449 PUSH_STATE(SADELIM); 450 statep->ls_adelim.delimiter = ':'; 451 statep->ls_adelim.num = 1; 452 statep->nparen = 0; 453 break; 454 } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) || 455 /*XXX what else? */ 456 c == '(' /*)*/) { 457 /* substring subst. */ 458 if (c != ' ') { 459 *wp++ = CHAR; 460 *wp++ = ' '; 461 } 462 ungetsc(c); 463 PUSH_STATE(SBRACE); 464 /* perhaps unneeded? */ 465 statep->ls_flags &= ~LS_HEREDOC; 466 PUSH_STATE(SADELIM); 467 statep->ls_adelim.delimiter = ':'; 468 statep->ls_adelim.num = 2; 469 statep->nparen = 0; 470 break; 471 } 472 } else if (c == '/') { 473 c2 = ADELIM; 474 parse_adelim_slash: 475 *wp++ = CHAR; 476 *wp++ = c; 477 if ((unsigned int)(c = getsc()) == ORD('/')) { 478 *wp++ = c2; 479 *wp++ = c; 480 } else 481 ungetsc(c); 482 PUSH_STATE(SBRACE); 483 /* perhaps unneeded? */ 484 statep->ls_flags &= ~LS_HEREDOC; 485 PUSH_STATE(SADELIM); 486 statep->ls_adelim.delimiter = '/'; 487 statep->ls_adelim.num = 1; 488 statep->nparen = 0; 489 break; 490 } else if (c == '@') { 491 c2 = getsc(); 492 ungetsc(c2); 493 if ((unsigned int)c2 == ORD('/')) { 494 c2 = CHAR; 495 goto parse_adelim_slash; 496 } 497 } 498 /* 499 * If this is a trim operation, 500 * treat (,|,) specially in STBRACE. 501 */ 502 if (ctype(c, C_SUB2)) { 503 ungetsc(c); 504 if (Flag(FSH)) 505 PUSH_STATE(STBRACEBOURNE); 506 else 507 PUSH_STATE(STBRACEKORN); 508 /* single-quotes-in-heredoc-trim */ 509 statep->ls_flags &= ~LS_HEREDOC; 510 } else { 511 ungetsc(c); 512 if (state == SDQUOTE || 513 state == SQBRACE) 514 PUSH_STATE(SQBRACE); 515 else 516 PUSH_STATE(SBRACE); 517 /* here no LS_HEREDOC removal */ 518 /* single-quotes-in-heredoc-braces */ 519 } 520 } else if (ctype(c, C_ALPHX)) { 521 *wp++ = OSUBST; 522 *wp++ = 'X'; 523 do { 524 Xcheck(ws, wp); 525 *wp++ = c; 526 c = getsc(); 527 } while (ctype(c, C_ALNUX)); 528 *wp++ = '\0'; 529 *wp++ = CSUBST; 530 *wp++ = 'X'; 531 ungetsc(c); 532 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 533 Xcheck(ws, wp); 534 *wp++ = OSUBST; 535 *wp++ = 'X'; 536 *wp++ = c; 537 *wp++ = '\0'; 538 *wp++ = CSUBST; 539 *wp++ = 'X'; 540 } else { 541 *wp++ = CHAR; 542 *wp++ = '$'; 543 ungetsc(c); 544 } 545 break; 546 case ORD('`'): 547 subst_gravis: 548 PUSH_STATE(SBQUOTE); 549 *wp++ = COMASUB; 550 /* 551 * We need to know whether we are within double 552 * quotes in order to translate \" to " within 553 * "`\"`" because, unlike for COMSUBs, the 554 * outer double quoteing changes the backslash 555 * meaning for the inside. For more details: 556 * http://austingroupbugs.net/view.php?id=1015 557 */ 558 statep->ls_bool = false; 559 s2 = statep; 560 base = state_info.base; 561 while (/* CONSTCOND */ 1) { 562 for (; s2 != base; s2--) { 563 if (s2->type == SDQUOTE) { 564 statep->ls_bool = true; 565 break; 566 } 567 } 568 if (s2 != base) 569 break; 570 if (!(s2 = s2->ls_base)) 571 break; 572 base = s2-- - STATE_BSIZE; 573 } 574 break; 575 case QCHAR: 576 if (cf & LQCHAR) { 577 *wp++ = QCHAR; 578 *wp++ = getsc(); 579 break; 580 } 581 /* FALLTHROUGH */ 582 default: 583 store_char: 584 *wp++ = CHAR; 585 *wp++ = c; 586 } 587 break; 588 589 case SEQUOTE: 590 if ((unsigned int)c == ORD('\'')) { 591 POP_STATE(); 592 *wp++ = CQUOTE; 593 ignore_backslash_newline--; 594 } else if ((unsigned int)c == ORD('\\')) { 595 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1) 596 c2 = getsc(); 597 if (c2 == 0) 598 statep->ls_bool = true; 599 if (!statep->ls_bool) { 600 char ts[4]; 601 602 if ((unsigned int)c2 < 0x100) { 603 *wp++ = QCHAR; 604 *wp++ = c2; 605 } else { 606 cz = utf_wctomb(ts, c2 - 0x100); 607 ts[cz] = 0; 608 cz = 0; 609 do { 610 *wp++ = QCHAR; 611 *wp++ = ts[cz]; 612 } while (ts[++cz]); 613 } 614 } 615 } else if (!statep->ls_bool) { 616 *wp++ = QCHAR; 617 *wp++ = c; 618 } 619 break; 620 621 case SSQUOTE: 622 if ((unsigned int)c == ORD('\'')) { 623 POP_STATE(); 624 if ((statep->ls_flags & LS_HEREDOC) || 625 state == SQBRACE) 626 goto store_char; 627 *wp++ = CQUOTE; 628 ignore_backslash_newline--; 629 } else { 630 *wp++ = QCHAR; 631 *wp++ = c; 632 } 633 break; 634 635 case SDQUOTE: 636 if ((unsigned int)c == ORD('"')) { 637 POP_STATE(); 638 *wp++ = CQUOTE; 639 } else 640 goto Subst; 641 break; 642 643 /* $(( ... )) */ 644 case SASPAREN: 645 if ((unsigned int)c == ORD('(')) 646 statep->nparen++; 647 else if ((unsigned int)c == ORD(')')) { 648 statep->nparen--; 649 if (statep->nparen == 1) { 650 /* end of EXPRSUB */ 651 POP_SRETRACE(); 652 653 if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) { 654 cz = strlen(sp) - 2; 655 XcheckN(ws, wp, cz); 656 memcpy(wp, sp + 1, cz); 657 wp += cz; 658 afree(sp, ATEMP); 659 *wp++ = '\0'; 660 break; 661 } else { 662 Source *s; 663 664 ungetsc(c2); 665 /* 666 * mismatched parenthesis - 667 * assume we were really 668 * parsing a $(...) expression 669 */ 670 --wp; 671 s = pushs(SREREAD, 672 source->areap); 673 s->start = s->str = 674 s->u.freeme = sp; 675 s->next = source; 676 source = s; 677 goto subst_command; 678 } 679 } 680 } 681 /* reuse existing state machine */ 682 goto Sbase2; 683 684 case SQBRACE: 685 if ((unsigned int)c == ORD('\\')) { 686 /* 687 * perform POSIX "quote removal" if the back- 688 * slash is "special", i.e. same cases as the 689 * {case '\\':} in Subst: plus closing brace; 690 * in mksh code "quote removal" on '\c' means 691 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 692 * emitted (in heredocquote:) 693 */ 694 if ((unsigned int)(c = getsc()) == ORD('"') || 695 (unsigned int)c == ORD('\\') || 696 ctype(c, C_DOLAR | C_GRAVE) || 697 (unsigned int)c == ORD(/*{*/ '}')) 698 goto store_qchar; 699 goto heredocquote; 700 } 701 goto common_SQBRACE; 702 703 case SBRACE: 704 if ((unsigned int)c == ORD('\'')) 705 goto open_ssquote_unless_heredoc; 706 else if ((unsigned int)c == ORD('\\')) 707 goto getsc_qchar; 708 common_SQBRACE: 709 if ((unsigned int)c == ORD('"')) 710 goto open_sdquote; 711 else if ((unsigned int)c == ORD('$')) 712 goto subst_dollar_ex; 713 else if ((unsigned int)c == ORD('`')) 714 goto subst_gravis; 715 else if ((unsigned int)c != ORD(/*{*/ '}')) 716 goto store_char; 717 POP_STATE(); 718 *wp++ = CSUBST; 719 *wp++ = /*{*/ '}'; 720 break; 721 722 /* Same as SBASE, except (,|,) treated specially */ 723 case STBRACEKORN: 724 if ((unsigned int)c == ORD('|')) 725 *wp++ = SPAT; 726 else if ((unsigned int)c == ORD('(')) { 727 *wp++ = OPAT; 728 /* simile for @ */ 729 *wp++ = ' '; 730 PUSH_STATE(SPATTERN); 731 } else /* FALLTHROUGH */ 732 case STBRACEBOURNE: 733 if ((unsigned int)c == ORD(/*{*/ '}')) { 734 POP_STATE(); 735 *wp++ = CSUBST; 736 *wp++ = /*{*/ '}'; 737 } else 738 goto Sbase1; 739 break; 740 741 case SBQUOTE: 742 if ((unsigned int)c == ORD('`')) { 743 *wp++ = 0; 744 POP_STATE(); 745 } else if ((unsigned int)c == ORD('\\')) { 746 switch (c = getsc()) { 747 case 0: 748 /* trailing \ is lost */ 749 break; 750 case ORD('$'): 751 case ORD('`'): 752 case ORD('\\'): 753 *wp++ = c; 754 break; 755 case ORD('"'): 756 if (statep->ls_bool) { 757 *wp++ = c; 758 break; 759 } 760 /* FALLTHROUGH */ 761 default: 762 *wp++ = '\\'; 763 *wp++ = c; 764 break; 765 } 766 } else 767 *wp++ = c; 768 break; 769 770 /* ONEWORD */ 771 case SWORD: 772 goto Subst; 773 774 /* LETEXPR: (( ... )) */ 775 case SLETPAREN: 776 if ((unsigned int)c == ORD(/*(*/ ')')) { 777 if (statep->nparen > 0) 778 --statep->nparen; 779 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) { 780 c = 0; 781 *wp++ = CQUOTE; 782 goto Done; 783 } else { 784 Source *s; 785 786 ungetsc(c2); 787 ungetsc(c); 788 /* 789 * mismatched parenthesis - 790 * assume we were really 791 * parsing a (...) expression 792 */ 793 *wp = EOS; 794 sp = Xstring(ws, wp); 795 dp = wdstrip(sp + 1, WDS_TPUTS); 796 s = pushs(SREREAD, source->areap); 797 s->start = s->str = s->u.freeme = dp; 798 s->next = source; 799 source = s; 800 ungetsc('(' /*)*/); 801 return (ORD('(' /*)*/)); 802 } 803 } else if ((unsigned int)c == ORD('(')) 804 /* 805 * parentheses inside quotes and 806 * backslashes are lost, but AT&T ksh 807 * doesn't count them either 808 */ 809 ++statep->nparen; 810 goto Sbase2; 811 812 /* << or <<- delimiter */ 813 case SHEREDELIM: 814 /* 815 * here delimiters need a special case since 816 * $ and `...` are not to be treated specially 817 */ 818 switch (c) { 819 case ORD('\\'): 820 if ((c = getsc())) { 821 /* trailing \ is lost */ 822 *wp++ = QCHAR; 823 *wp++ = c; 824 } 825 break; 826 case ORD('\''): 827 goto open_ssquote_unless_heredoc; 828 case ORD('$'): 829 if ((unsigned int)(c2 = getsc()) == ORD('\'')) { 830 open_sequote: 831 *wp++ = OQUOTE; 832 ignore_backslash_newline++; 833 PUSH_STATE(SEQUOTE); 834 statep->ls_bool = false; 835 break; 836 } else if ((unsigned int)c2 == ORD('"')) { 837 /* FALLTHROUGH */ 838 case ORD('"'): 839 PUSH_SRETRACE(SHEREDQUOTE); 840 break; 841 } 842 ungetsc(c2); 843 /* FALLTHROUGH */ 844 default: 845 *wp++ = CHAR; 846 *wp++ = c; 847 } 848 break; 849 850 /* " in << or <<- delimiter */ 851 case SHEREDQUOTE: 852 if ((unsigned int)c != ORD('"')) 853 goto Subst; 854 POP_SRETRACE(); 855 dp = strnul(sp) - 1; 856 /* remove the trailing double quote */ 857 *dp = '\0'; 858 /* store the quoted string */ 859 *wp++ = OQUOTE; 860 XcheckN(ws, wp, (dp - sp) * 2); 861 dp = sp; 862 while ((c = *dp++)) { 863 if (c == '\\') { 864 switch ((c = *dp++)) { 865 case ORD('\\'): 866 case ORD('"'): 867 case ORD('$'): 868 case ORD('`'): 869 break; 870 default: 871 *wp++ = CHAR; 872 *wp++ = '\\'; 873 break; 874 } 875 } 876 *wp++ = CHAR; 877 *wp++ = c; 878 } 879 afree(sp, ATEMP); 880 *wp++ = CQUOTE; 881 state = statep->type = SHEREDELIM; 882 break; 883 884 /* in *(...|...) pattern (*+?@!) */ 885 case SPATTERN: 886 if ((unsigned int)c == ORD(/*(*/ ')')) { 887 *wp++ = CPAT; 888 POP_STATE(); 889 } else if ((unsigned int)c == ORD('|')) { 890 *wp++ = SPAT; 891 } else if ((unsigned int)c == ORD('(')) { 892 *wp++ = OPAT; 893 /* simile for @ */ 894 *wp++ = ' '; 895 PUSH_STATE(SPATTERN); 896 } else 897 goto Sbase1; 898 break; 899 } 900 } 901 Done: 902 Xcheck(ws, wp); 903 if (statep != &states[1]) 904 /* XXX figure out what is missing */ 905 yyerror("no closing quote"); 906 907 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 908 if (state == SHEREDELIM) 909 state = SBASE; 910 911 dp = Xstring(ws, wp); 912 if (state == SBASE && ( 913 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || 914 ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 || 915 (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) { 916 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 917 918 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1; 919 920 if (c == '&') { 921 if ((unsigned int)(c2 = getsc()) != ORD('>')) { 922 ungetsc(c2); 923 goto no_iop; 924 } 925 c = c2; 926 iop->ioflag = IOBASH; 927 } else 928 iop->ioflag = 0; 929 930 c2 = getsc(); 931 /* <<, >>, <> are ok, >< is not */ 932 if (c == c2 || ((unsigned int)c == ORD('<') && 933 (unsigned int)c2 == ORD('>'))) { 934 iop->ioflag |= c == c2 ? 935 ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR; 936 if (iop->ioflag == IOHERE) { 937 if ((unsigned int)(c2 = getsc()) == ORD('-')) 938 iop->ioflag |= IOSKIP; 939 else if ((unsigned int)c2 == ORD('<')) 940 iop->ioflag |= IOHERESTR; 941 else 942 ungetsc(c2); 943 } 944 } else if ((unsigned int)c2 == ORD('&')) 945 iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0); 946 else { 947 iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD; 948 if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|')) 949 iop->ioflag |= IOCLOB; 950 else 951 ungetsc(c2); 952 } 953 954 iop->ioname = NULL; 955 iop->delim = NULL; 956 iop->heredoc = NULL; 957 /* free word */ 958 Xfree(ws, wp); 959 yylval.iop = iop; 960 return (REDIR); 961 no_iop: 962 afree(iop, ATEMP); 963 } 964 965 if (wp == dp && state == SBASE) { 966 /* free word */ 967 Xfree(ws, wp); 968 /* no word, process LEX1 character */ 969 if (((unsigned int)c == ORD('|')) || 970 ((unsigned int)c == ORD('&')) || 971 ((unsigned int)c == ORD(';')) || 972 ((unsigned int)c == ORD('(' /*)*/))) { 973 if ((c2 = getsc()) == c) 974 c = ((unsigned int)c == ORD(';')) ? BREAK : 975 ((unsigned int)c == ORD('|')) ? LOGOR : 976 ((unsigned int)c == ORD('&')) ? LOGAND : 977 /* (unsigned int)c == ORD('(' )) */ MDPAREN; 978 else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&')) 979 c = COPROC; 980 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|')) 981 c = BRKEV; 982 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&')) 983 c = BRKFT; 984 else 985 ungetsc(c2); 986 #ifndef MKSH_SMALL 987 if (c == BREAK) { 988 if ((unsigned int)(c2 = getsc()) == ORD('&')) 989 c = BRKEV; 990 else 991 ungetsc(c2); 992 } 993 #endif 994 } else if ((unsigned int)c == ORD('\n')) { 995 if (cf & HEREDELIM) 996 ungetsc(c); 997 else { 998 gethere(); 999 if (cf & CONTIN) 1000 goto Again; 1001 } 1002 } else if (c == '\0' && !(cf & HEREDELIM)) { 1003 struct ioword **p = heres; 1004 1005 while (p < herep) 1006 if ((*p)->ioflag & IOHERESTR) 1007 ++p; 1008 else 1009 /* ksh -c 'cat <<EOF' can cause this */ 1010 yyerror(Tf_heredoc, 1011 evalstr((*p)->delim, 0)); 1012 } 1013 return (c); 1014 } 1015 1016 /* terminate word */ 1017 *wp++ = EOS; 1018 yylval.cp = Xclose(ws, wp); 1019 if (state == SWORD || state == SLETPAREN 1020 /* XXX ONEWORD? */) 1021 return (LWORD); 1022 1023 /* unget terminator */ 1024 ungetsc(c); 1025 1026 /* 1027 * note: the alias-vs-function code below depends on several 1028 * interna: starting from here, source->str is not modified; 1029 * the way getsc() and ungetsc() operate; etc. 1030 */ 1031 1032 /* copy word to unprefixed string ident */ 1033 sp = yylval.cp; 1034 dp = ident; 1035 while ((dp - ident) < IDENT && (c = *sp++) == CHAR) 1036 *dp++ = *sp++; 1037 if (c != EOS) 1038 /* word is not unquoted, or space ran out */ 1039 dp = ident; 1040 /* make sure the ident array stays NUL padded */ 1041 memset(dp, 0, (ident + IDENT) - dp + 1); 1042 1043 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) { 1044 struct tbl *p; 1045 uint32_t h = hash(ident); 1046 1047 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1048 (!(cf & ESACONLY) || p->val.i == ESAC || 1049 (unsigned int)p->val.i == ORD(/*{*/ '}'))) { 1050 afree(yylval.cp, ATEMP); 1051 return (p->val.i); 1052 } 1053 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1054 (p->flag & ISSET)) { 1055 /* 1056 * this still points to the same character as the 1057 * ungetsc'd terminator from above 1058 */ 1059 const char *cp = source->str; 1060 1061 /* prefer POSIX but not Korn functions over aliases */ 1062 while (ctype(*cp, C_BLANK)) 1063 /* 1064 * this is like getsc() without skipping 1065 * over Source boundaries (including not 1066 * parsing ungetsc'd characters that got 1067 * pushed into an SREREAD) which is what 1068 * we want here anyway: find out whether 1069 * the alias name is followed by a POSIX 1070 * function definition 1071 */ 1072 ++cp; 1073 /* prefer functions over aliases */ 1074 if (cp[0] != '(' || cp[1] != ')') { 1075 Source *s = source; 1076 1077 while (s && (s->flags & SF_HASALIAS)) 1078 if (s->u.tblp == p) 1079 return (LWORD); 1080 else 1081 s = s->next; 1082 /* push alias expansion */ 1083 s = pushs(SALIAS, source->areap); 1084 s->start = s->str = p->val.s; 1085 s->u.tblp = p; 1086 s->flags |= SF_HASALIAS; 1087 s->line = source->line; 1088 s->next = source; 1089 if (source->type == SEOF) { 1090 /* prevent infinite recursion at EOS */ 1091 source->u.tblp = p; 1092 source->flags |= SF_HASALIAS; 1093 } 1094 source = s; 1095 afree(yylval.cp, ATEMP); 1096 goto Again; 1097 } 1098 } 1099 } else if (*ident == '\0') { 1100 /* retain typeset et al. even when quoted */ 1101 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0))); 1102 uint32_t flag = tt ? tt->flag : 0; 1103 1104 if (flag & (DECL_UTIL | DECL_FWDR)) 1105 strlcpy(ident, dp, sizeof(ident)); 1106 afree(dp, ATEMP); 1107 } 1108 1109 return (LWORD); 1110 } 1111 1112 static void 1113 gethere(void) 1114 { 1115 struct ioword **p; 1116 1117 for (p = heres; p < herep; p++) 1118 if (!((*p)->ioflag & IOHERESTR)) 1119 readhere(*p); 1120 herep = heres; 1121 } 1122 1123 /* 1124 * read "<<word" text into temp file 1125 */ 1126 1127 static void 1128 readhere(struct ioword *iop) 1129 { 1130 int c; 1131 const char *eof, *eofp; 1132 XString xs; 1133 char *xp; 1134 size_t xpos; 1135 1136 eof = evalstr(iop->delim, 0); 1137 1138 if (!(iop->ioflag & IOEVAL)) 1139 ignore_backslash_newline++; 1140 1141 Xinit(xs, xp, 256, ATEMP); 1142 1143 heredoc_read_line: 1144 /* beginning of line */ 1145 eofp = eof; 1146 xpos = Xsavepos(xs, xp); 1147 if (iop->ioflag & IOSKIP) { 1148 /* skip over leading tabs */ 1149 while ((c = getsc()) == '\t') 1150 ; /* nothing */ 1151 goto heredoc_parse_char; 1152 } 1153 heredoc_read_char: 1154 c = getsc(); 1155 heredoc_parse_char: 1156 /* compare with here document marker */ 1157 if (!*eofp) { 1158 /* end of here document marker, what to do? */ 1159 switch (c) { 1160 case ORD(/*(*/ ')'): 1161 if (!subshell_nesting_type) 1162 /*- 1163 * not allowed outside $(...) or (...) 1164 * => mismatch 1165 */ 1166 break; 1167 /* allow $(...) or (...) to close here */ 1168 ungetsc(/*(*/ ')'); 1169 /* FALLTHROUGH */ 1170 case 0: 1171 /* 1172 * Allow EOF here to commands without trailing 1173 * newlines (mksh -c '...') will work as well. 1174 */ 1175 case ORD('\n'): 1176 /* Newline terminates here document marker */ 1177 goto heredoc_found_terminator; 1178 } 1179 } else if ((unsigned int)c == ord(*eofp++)) 1180 /* store; then read and compare next character */ 1181 goto heredoc_store_and_loop; 1182 /* nope, mismatch; read until end of line */ 1183 while (c != '\n') { 1184 if (!c) 1185 /* oops, reached EOF */ 1186 yyerror(Tf_heredoc, eof); 1187 /* store character */ 1188 Xcheck(xs, xp); 1189 Xput(xs, xp, c); 1190 /* read next character */ 1191 c = getsc(); 1192 } 1193 /* we read a newline as last character */ 1194 heredoc_store_and_loop: 1195 /* store character */ 1196 Xcheck(xs, xp); 1197 Xput(xs, xp, c); 1198 if (c == '\n') 1199 goto heredoc_read_line; 1200 goto heredoc_read_char; 1201 1202 heredoc_found_terminator: 1203 /* jump back to saved beginning of line */ 1204 xp = Xrestpos(xs, xp, xpos); 1205 /* terminate, close and store */ 1206 Xput(xs, xp, '\0'); 1207 iop->heredoc = Xclose(xs, xp); 1208 1209 if (!(iop->ioflag & IOEVAL)) 1210 ignore_backslash_newline--; 1211 } 1212 1213 void 1214 yyerror(const char *fmt, ...) 1215 { 1216 va_list va; 1217 1218 /* pop aliases and re-reads */ 1219 while (source->type == SALIAS || source->type == SREREAD) 1220 source = source->next; 1221 /* zap pending input */ 1222 source->str = null; 1223 1224 error_prefix(true); 1225 va_start(va, fmt); 1226 shf_vfprintf(shl_out, fmt, va); 1227 shf_putc('\n', shl_out); 1228 va_end(va); 1229 errorfz(); 1230 } 1231 1232 /* 1233 * input for yylex with alias expansion 1234 */ 1235 1236 Source * 1237 pushs(int type, Area *areap) 1238 { 1239 Source *s; 1240 1241 s = alloc(sizeof(Source), areap); 1242 memset(s, 0, sizeof(Source)); 1243 s->type = type; 1244 s->str = null; 1245 s->areap = areap; 1246 if (type == SFILE || type == SSTDIN) 1247 XinitN(s->xs, 256, s->areap); 1248 return (s); 1249 } 1250 1251 static int 1252 getsc_uu(void) 1253 { 1254 Source *s = source; 1255 int c; 1256 1257 while ((c = ord(*s->str++)) == 0) { 1258 /* return 0 for EOF by default */ 1259 s->str = NULL; 1260 switch (s->type) { 1261 case SEOF: 1262 s->str = null; 1263 return (0); 1264 1265 case SSTDIN: 1266 case SFILE: 1267 getsc_line(s); 1268 break; 1269 1270 case SWSTR: 1271 break; 1272 1273 case SSTRING: 1274 case SSTRINGCMDLINE: 1275 break; 1276 1277 case SWORDS: 1278 s->start = s->str = *s->u.strv++; 1279 s->type = SWORDSEP; 1280 break; 1281 1282 case SWORDSEP: 1283 if (*s->u.strv == NULL) { 1284 s->start = s->str = "\n"; 1285 s->type = SEOF; 1286 } else { 1287 s->start = s->str = T1space; 1288 s->type = SWORDS; 1289 } 1290 break; 1291 1292 case SALIAS: 1293 if (s->flags & SF_ALIASEND) { 1294 /* pass on an unused SF_ALIAS flag */ 1295 source = s->next; 1296 source->flags |= s->flags & SF_ALIAS; 1297 s = source; 1298 } else if (*s->u.tblp->val.s && 1299 ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) { 1300 /* pop source stack */ 1301 source = s = s->next; 1302 /* 1303 * Note that this alias ended with a 1304 * space, enabling alias expansion on 1305 * the following word. 1306 */ 1307 s->flags |= SF_ALIAS; 1308 } else { 1309 /* 1310 * At this point, we need to keep the current 1311 * alias in the source list so recursive 1312 * aliases can be detected and we also need to 1313 * return the next character. Do this by 1314 * temporarily popping the alias to get the 1315 * next character and then put it back in the 1316 * source list with the SF_ALIASEND flag set. 1317 */ 1318 /* pop source stack */ 1319 source = s->next; 1320 source->flags |= s->flags & SF_ALIAS; 1321 c = getsc_uu(); 1322 if (c) { 1323 s->flags |= SF_ALIASEND; 1324 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1325 s->start = s->str = s->ugbuf; 1326 s->next = source; 1327 source = s; 1328 } else { 1329 s = source; 1330 /* avoid reading EOF twice */ 1331 s->str = NULL; 1332 break; 1333 } 1334 } 1335 continue; 1336 1337 case SREREAD: 1338 if (s->start != s->ugbuf) 1339 /* yuck */ 1340 afree(s->u.freeme, ATEMP); 1341 source = s = s->next; 1342 continue; 1343 } 1344 if (s->str == NULL) { 1345 s->type = SEOF; 1346 s->start = s->str = null; 1347 return ('\0'); 1348 } 1349 if (s->flags & SF_ECHO) { 1350 shf_puts(s->str, shl_out); 1351 shf_flush(shl_out); 1352 } 1353 } 1354 return (c); 1355 } 1356 1357 static void 1358 getsc_line(Source *s) 1359 { 1360 char *xp = Xstring(s->xs, xp), *cp; 1361 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1362 bool have_tty = interactive && (s->flags & SF_TTY) && tty_hasstate; 1363 1364 /* Done here to ensure nothing odd happens when a timeout occurs */ 1365 XcheckN(s->xs, xp, LINE); 1366 *xp = '\0'; 1367 s->start = s->str = xp; 1368 1369 if (have_tty && ksh_tmout) { 1370 ksh_tmout_state = TMOUT_READING; 1371 alarm(ksh_tmout); 1372 } 1373 if (interactive) { 1374 if (cur_prompt == PS1) 1375 histsave(&s->line, NULL, HIST_FLUSH, true); 1376 change_winsz(); 1377 } 1378 #ifndef MKSH_NO_CMDLINE_EDITING 1379 if (have_tty && ( 1380 #if !MKSH_S_NOVI 1381 Flag(FVI) || 1382 #endif 1383 Flag(FEMACS) || Flag(FGMACS))) { 1384 int nread; 1385 1386 nread = x_read(xp); 1387 if (nread < 0) 1388 /* read error */ 1389 nread = 0; 1390 xp[nread] = '\0'; 1391 xp += nread; 1392 } else 1393 #endif 1394 { 1395 if (interactive) 1396 pprompt(prompt, 0); 1397 else 1398 s->line++; 1399 1400 while (/* CONSTCOND */ 1) { 1401 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1402 1403 if (!p && shf_error(s->u.shf) && 1404 shf_errno(s->u.shf) == EINTR) { 1405 shf_clearerr(s->u.shf); 1406 if (trap) 1407 runtraps(0); 1408 continue; 1409 } 1410 if (!p || (xp = p, xp[-1] == '\n')) 1411 break; 1412 /* double buffer size */ 1413 /* move past NUL so doubling works... */ 1414 xp++; 1415 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1416 /* ...and move back again */ 1417 xp--; 1418 } 1419 /* 1420 * flush any unwanted input so other programs/builtins 1421 * can read it. Not very optimal, but less error prone 1422 * than flushing else where, dealing with redirections, 1423 * etc. 1424 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1425 */ 1426 if (s->type == SSTDIN) 1427 shf_flush(s->u.shf); 1428 } 1429 /* 1430 * XXX: temporary kludge to restore source after a 1431 * trap may have been executed. 1432 */ 1433 source = s; 1434 if (have_tty && ksh_tmout) { 1435 ksh_tmout_state = TMOUT_EXECUTING; 1436 alarm(0); 1437 } 1438 cp = Xstring(s->xs, xp); 1439 rndpush(cp); 1440 s->start = s->str = cp; 1441 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1442 /* Note: if input is all nulls, this is not eof */ 1443 if (Xlength(s->xs, xp) == 0) { 1444 /* EOF */ 1445 if (s->type == SFILE) 1446 shf_fdclose(s->u.shf); 1447 s->str = NULL; 1448 } else if (interactive && *s->str) { 1449 if (cur_prompt != PS1) 1450 histsave(&s->line, s->str, HIST_APPEND, true); 1451 else if (!ctype(*s->str, C_IFS | C_IFSWS)) 1452 histsave(&s->line, s->str, HIST_QUEUE, true); 1453 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1454 else 1455 goto check_for_sole_return; 1456 } else if (interactive && cur_prompt == PS1) { 1457 check_for_sole_return: 1458 cp = Xstring(s->xs, xp); 1459 while (ctype(*cp, C_IFSWS)) 1460 ++cp; 1461 if (!*cp) { 1462 histsave(&s->line, NULL, HIST_FLUSH, true); 1463 histsync(); 1464 } 1465 #endif 1466 } 1467 if (interactive) 1468 set_prompt(PS2, NULL); 1469 } 1470 1471 void 1472 set_prompt(int to, Source *s) 1473 { 1474 cur_prompt = (uint8_t)to; 1475 1476 switch (to) { 1477 /* command */ 1478 case PS1: 1479 /* 1480 * Substitute ! and !! here, before substitutions are done 1481 * so ! in expanded variables are not expanded. 1482 * NOTE: this is not what AT&T ksh does (it does it after 1483 * substitutions, POSIX doesn't say which is to be done. 1484 */ 1485 { 1486 struct shf *shf; 1487 char * volatile ps1; 1488 Area *saved_atemp; 1489 int saved_lineno; 1490 1491 ps1 = str_val(global("PS1")); 1492 shf = shf_sopen(NULL, strlen(ps1) * 2, 1493 SHF_WR | SHF_DYNAMIC, NULL); 1494 while (*ps1) 1495 if (*ps1 != '!' || *++ps1 == '!') 1496 shf_putchar(*ps1++, shf); 1497 else 1498 shf_fprintf(shf, Tf_lu, s ? 1499 (unsigned long)s->line + 1 : 0UL); 1500 ps1 = shf_sclose(shf); 1501 saved_lineno = current_lineno; 1502 if (s) 1503 current_lineno = s->line + 1; 1504 saved_atemp = ATEMP; 1505 newenv(E_ERRH); 1506 if (kshsetjmp(e->jbuf)) { 1507 prompt = safe_prompt; 1508 /* 1509 * Don't print an error - assume it has already 1510 * been printed. Reason is we may have forked 1511 * to run a command and the child may be 1512 * unwinding its stack through this code as it 1513 * exits. 1514 */ 1515 } else { 1516 char *cp = substitute(ps1, 0); 1517 strdupx(prompt, cp, saved_atemp); 1518 } 1519 current_lineno = saved_lineno; 1520 quitenv(NULL); 1521 } 1522 break; 1523 /* command continuation */ 1524 case PS2: 1525 prompt = str_val(global("PS2")); 1526 break; 1527 } 1528 } 1529 1530 int 1531 pprompt(const char *cp, int ntruncate) 1532 { 1533 char delimiter = 0; 1534 bool doprint = (ntruncate != -1); 1535 bool indelimit = false; 1536 int columns = 0, lines = 0; 1537 1538 /* 1539 * Undocumented AT&T ksh feature: 1540 * If the second char in the prompt string is \r then the first 1541 * char is taken to be a non-printing delimiter and any chars 1542 * between two instances of the delimiter are not considered to 1543 * be part of the prompt length 1544 */ 1545 if (*cp && cp[1] == '\r') { 1546 delimiter = *cp; 1547 cp += 2; 1548 } 1549 for (; *cp; cp++) { 1550 if (indelimit && *cp != delimiter) 1551 ; 1552 else if (ctype(*cp, C_CR | C_LF)) { 1553 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1554 columns = 0; 1555 } else if (*cp == '\t') { 1556 columns = (columns | 7) + 1; 1557 } else if (*cp == '\b') { 1558 if (columns > 0) 1559 columns--; 1560 } else if (*cp == delimiter) 1561 indelimit = !indelimit; 1562 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) { 1563 const char *cp2; 1564 columns += utf_widthadj(cp, &cp2); 1565 if (doprint && (indelimit || 1566 (ntruncate < (x_cols * lines + columns)))) 1567 shf_write(cp, cp2 - cp, shl_out); 1568 cp = cp2 - /* loop increment */ 1; 1569 continue; 1570 } else 1571 columns++; 1572 if (doprint && (*cp != delimiter) && 1573 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1574 shf_putc(*cp, shl_out); 1575 } 1576 if (doprint) 1577 shf_flush(shl_out); 1578 return (x_cols * lines + columns); 1579 } 1580 1581 /* 1582 * Read the variable part of a ${...} expression (i.e. up to but not 1583 * including the :[-+?=#%] or close-brace). 1584 */ 1585 static char * 1586 get_brace_var(XString *wsp, char *wp) 1587 { 1588 char c; 1589 enum parse_state { 1590 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG, 1591 PS_IDENT, PS_NUMBER, PS_VAR1 1592 } state = PS_INITIAL; 1593 1594 while (/* CONSTCOND */ 1) { 1595 c = getsc(); 1596 /* State machine to figure out where the variable part ends. */ 1597 switch (state) { 1598 case PS_SAW_HASH: 1599 if (ctype(c, C_VAR1)) { 1600 char c2; 1601 1602 c2 = getsc(); 1603 ungetsc(c2); 1604 if (ord(c2) != ORD(/*{*/ '}')) { 1605 ungetsc(c); 1606 goto out; 1607 } 1608 } 1609 goto ps_common; 1610 case PS_SAW_BANG: 1611 switch (ord(c)) { 1612 case ORD('@'): 1613 case ORD('#'): 1614 case ORD('-'): 1615 case ORD('?'): 1616 goto out; 1617 } 1618 goto ps_common; 1619 case PS_INITIAL: 1620 switch (ord(c)) { 1621 case ORD('%'): 1622 state = PS_SAW_PERCENT; 1623 goto next; 1624 case ORD('#'): 1625 state = PS_SAW_HASH; 1626 goto next; 1627 case ORD('!'): 1628 state = PS_SAW_BANG; 1629 goto next; 1630 } 1631 /* FALLTHROUGH */ 1632 case PS_SAW_PERCENT: 1633 ps_common: 1634 if (ctype(c, C_ALPHX)) 1635 state = PS_IDENT; 1636 else if (ctype(c, C_DIGIT)) 1637 state = PS_NUMBER; 1638 else if (ctype(c, C_VAR1)) 1639 state = PS_VAR1; 1640 else 1641 goto out; 1642 break; 1643 case PS_IDENT: 1644 if (!ctype(c, C_ALNUX)) { 1645 if (ord(c) == ORD('[')) { 1646 char *tmp, *p; 1647 1648 if (!arraysub(&tmp)) 1649 yyerror("missing ]"); 1650 *wp++ = c; 1651 p = tmp; 1652 while (*p) { 1653 Xcheck(*wsp, wp); 1654 *wp++ = *p++; 1655 } 1656 afree(tmp, ATEMP); 1657 /* the ] */ 1658 c = getsc(); 1659 } 1660 goto out; 1661 } 1662 next: 1663 break; 1664 case PS_NUMBER: 1665 if (!ctype(c, C_DIGIT)) 1666 goto out; 1667 break; 1668 case PS_VAR1: 1669 goto out; 1670 } 1671 Xcheck(*wsp, wp); 1672 *wp++ = c; 1673 } 1674 out: 1675 /* end of variable part */ 1676 *wp++ = '\0'; 1677 ungetsc(c); 1678 return (wp); 1679 } 1680 1681 /* 1682 * Save an array subscript - returns true if matching bracket found, false 1683 * if eof or newline was found. 1684 * (Returned string double null terminated) 1685 */ 1686 static bool 1687 arraysub(char **strp) 1688 { 1689 XString ws; 1690 char *wp, c; 1691 /* we are just past the initial [ */ 1692 unsigned int depth = 1; 1693 1694 Xinit(ws, wp, 32, ATEMP); 1695 1696 do { 1697 c = getsc(); 1698 Xcheck(ws, wp); 1699 *wp++ = c; 1700 if (ord(c) == ORD('[')) 1701 depth++; 1702 else if (ord(c) == ORD(']')) 1703 depth--; 1704 } while (depth > 0 && c && c != '\n'); 1705 1706 *wp++ = '\0'; 1707 *strp = Xclose(ws, wp); 1708 1709 return (tobool(depth == 0)); 1710 } 1711 1712 /* Unget a char: handles case when we are already at the start of the buffer */ 1713 static void 1714 ungetsc(int c) 1715 { 1716 struct sretrace_info *rp = retrace_info; 1717 1718 if (backslash_skip) 1719 backslash_skip--; 1720 /* Don't unget EOF... */ 1721 if (source->str == null && c == '\0') 1722 return; 1723 while (rp) { 1724 if (Xlength(rp->xs, rp->xp)) 1725 rp->xp--; 1726 rp = rp->next; 1727 } 1728 ungetsc_i(c); 1729 } 1730 static void 1731 ungetsc_i(int c) 1732 { 1733 if (source->str > source->start) 1734 source->str--; 1735 else { 1736 Source *s; 1737 1738 s = pushs(SREREAD, source->areap); 1739 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1740 s->start = s->str = s->ugbuf; 1741 s->next = source; 1742 source = s; 1743 } 1744 } 1745 1746 1747 /* Called to get a char that isn't a \newline sequence. */ 1748 static int 1749 getsc_bn(void) 1750 { 1751 int c, c2; 1752 1753 if (ignore_backslash_newline) 1754 return (o_getsc_u()); 1755 1756 if (backslash_skip == 1) { 1757 backslash_skip = 2; 1758 return (o_getsc_u()); 1759 } 1760 1761 backslash_skip = 0; 1762 1763 while (/* CONSTCOND */ 1) { 1764 c = o_getsc_u(); 1765 if (c == '\\') { 1766 if ((c2 = o_getsc_u()) == '\n') 1767 /* ignore the \newline; get the next char... */ 1768 continue; 1769 ungetsc_i(c2); 1770 backslash_skip = 1; 1771 } 1772 return (c); 1773 } 1774 } 1775 1776 void 1777 yyskiputf8bom(void) 1778 { 1779 int c; 1780 1781 if (rtt2asc((c = o_getsc_u())) != 0xEF) { 1782 ungetsc_i(c); 1783 return; 1784 } 1785 if (rtt2asc((c = o_getsc_u())) != 0xBB) { 1786 ungetsc_i(c); 1787 ungetsc_i(asc2rtt(0xEF)); 1788 return; 1789 } 1790 if (rtt2asc((c = o_getsc_u())) != 0xBF) { 1791 ungetsc_i(c); 1792 ungetsc_i(asc2rtt(0xBB)); 1793 ungetsc_i(asc2rtt(0xEF)); 1794 return; 1795 } 1796 UTFMODE |= 8; 1797 } 1798 1799 static Lex_state * 1800 push_state_i(State_info *si, Lex_state *old_end) 1801 { 1802 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1803 1804 news[0].ls_base = old_end; 1805 si->base = &news[0]; 1806 si->end = &news[STATE_BSIZE]; 1807 return (&news[1]); 1808 } 1809 1810 static Lex_state * 1811 pop_state_i(State_info *si, Lex_state *old_end) 1812 { 1813 Lex_state *old_base = si->base; 1814 1815 si->base = old_end->ls_base - STATE_BSIZE; 1816 si->end = old_end->ls_base; 1817 1818 afree(old_base, ATEMP); 1819 1820 return (si->base + STATE_BSIZE - 1); 1821 } 1822