1 /* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 * 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 6 * mirabilos <m (at) mirbsd.org> 7 * 8 * Provided that these terms and disclaimer and all copyright notices 9 * are retained or reproduced in an accompanying document, permission 10 * is granted to deal in this work without restriction, including un- 11 * limited rights to use, publicly perform, distribute, sell, modify, 12 * merge, give away, or sublicence. 13 * 14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 15 * the utmost extent permitted by applicable law, neither express nor 16 * implied; without malicious intent or gross negligence. In no event 17 * may a licensor, author or contributor be held liable for indirect, 18 * direct, other damage, loss, or other issues arising in any way out 19 * of dealing in the work, even if advised of the possibility of such 20 * damage or existence of a defect, except proven that it results out 21 * of said person's immediate fault when using the work as intended. 22 */ 23 24 #include "sh.h" 25 26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.247 2018/01/14 01:44:01 tg Exp $"); 27 28 /* 29 * states while lexing word 30 */ 31 #define SBASE 0 /* outside any lexical constructs */ 32 #define SWORD 1 /* implicit quoting for substitute() */ 33 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 34 #define SSQUOTE 3 /* inside '' */ 35 #define SDQUOTE 4 /* inside "" */ 36 #define SEQUOTE 5 /* inside $'' */ 37 #define SBRACE 6 /* inside ${} */ 38 #define SQBRACE 7 /* inside "${}" */ 39 #define SBQUOTE 8 /* inside `` */ 40 #define SASPAREN 9 /* inside $(( )) */ 41 #define SHEREDELIM 10 /* parsing << or <<- delimiter */ 42 #define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */ 43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 44 #define SADELIM 13 /* like SBASE, looking for delimiter */ 45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 size_t start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 } Lex_state; 81 #define ls_base u.base 82 #define ls_start u.start 83 #define ls_bool u.abool 84 #define ls_adelim u.adelim 85 86 typedef struct { 87 Lex_state *base; 88 Lex_state *end; 89 } State_info; 90 91 static void readhere(struct ioword *); 92 static void ungetsc(int); 93 static void ungetsc_i(int); 94 static int getsc_uu(void); 95 static void getsc_line(Source *); 96 static int getsc_bn(void); 97 static int getsc_i(void); 98 static char *get_brace_var(XString *, char *); 99 static bool arraysub(char **); 100 static void gethere(void); 101 static Lex_state *push_state_i(State_info *, Lex_state *); 102 static Lex_state *pop_state_i(State_info *, Lex_state *); 103 104 static int backslash_skip; 105 static int ignore_backslash_newline; 106 107 /* optimised getsc_bn() */ 108 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 109 !backslash_skip ? *source->str++ : getsc_bn()) 110 /* optimised getsc_uu() */ 111 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 112 113 /* retrace helper */ 114 #define o_getsc_r(carg) \ 115 int cev = (carg); \ 116 struct sretrace_info *rp = retrace_info; \ 117 \ 118 while (rp) { \ 119 Xcheck(rp->xs, rp->xp); \ 120 *rp->xp++ = cev; \ 121 rp = rp->next; \ 122 } \ 123 \ 124 return (cev); 125 126 /* callback */ 127 static int 128 getsc_i(void) 129 { 130 o_getsc_r((unsigned int)(unsigned char)o_getsc()); 131 } 132 133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) 134 #define getsc() getsc_i() 135 #else 136 static int getsc_r(int); 137 138 static int 139 getsc_r(int c) 140 { 141 o_getsc_r(c); 142 } 143 144 #define getsc() getsc_r((unsigned int)(unsigned char)o_getsc()) 145 #endif 146 147 #define STATE_BSIZE 8 148 149 #define PUSH_STATE(s) do { \ 150 if (++statep == state_info.end) \ 151 statep = push_state_i(&state_info, statep); \ 152 state = statep->type = (s); \ 153 } while (/* CONSTCOND */ 0) 154 155 #define POP_STATE() do { \ 156 if (--statep == state_info.base) \ 157 statep = pop_state_i(&state_info, statep); \ 158 state = statep->type; \ 159 } while (/* CONSTCOND */ 0) 160 161 #define PUSH_SRETRACE(s) do { \ 162 struct sretrace_info *ri; \ 163 \ 164 PUSH_STATE(s); \ 165 statep->ls_start = Xsavepos(ws, wp); \ 166 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 167 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 168 ri->next = retrace_info; \ 169 retrace_info = ri; \ 170 } while (/* CONSTCOND */ 0) 171 172 #define POP_SRETRACE() do { \ 173 wp = Xrestpos(ws, wp, statep->ls_start); \ 174 *retrace_info->xp = '\0'; \ 175 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 176 dp = (void *)retrace_info; \ 177 retrace_info = retrace_info->next; \ 178 afree(dp, ATEMP); \ 179 POP_STATE(); \ 180 } while (/* CONSTCOND */ 0) 181 182 /** 183 * Lexical analyser 184 * 185 * tokens are not regular expressions, they are LL(1). 186 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 187 * hence the state stack. Note "$(...)" are now parsed recursively. 188 */ 189 190 int 191 yylex(int cf) 192 { 193 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 194 State_info state_info; 195 int c, c2, state; 196 size_t cz; 197 XString ws; /* expandable output word */ 198 char *wp; /* output word pointer */ 199 char *sp, *dp; 200 201 Again: 202 states[0].type = SINVALID; 203 states[0].ls_base = NULL; 204 statep = &states[1]; 205 state_info.base = states; 206 state_info.end = &state_info.base[STATE_BSIZE]; 207 208 Xinit(ws, wp, 64, ATEMP); 209 210 backslash_skip = 0; 211 ignore_backslash_newline = 0; 212 213 if (cf & ONEWORD) 214 state = SWORD; 215 else if (cf & LETEXPR) { 216 /* enclose arguments in (double) quotes */ 217 *wp++ = OQUOTE; 218 state = SLETPAREN; 219 statep->nparen = 0; 220 } else { 221 /* normal lexing */ 222 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 223 do { 224 c = getsc(); 225 } while (ctype(c, C_BLANK)); 226 if (c == '#') { 227 ignore_backslash_newline++; 228 do { 229 c = getsc(); 230 } while (!ctype(c, C_NUL | C_LF)); 231 ignore_backslash_newline--; 232 } 233 ungetsc(c); 234 } 235 if (source->flags & SF_ALIAS) { 236 /* trailing ' ' in alias definition */ 237 source->flags &= ~SF_ALIAS; 238 /* POSIX: trailing space only counts if parsing simple cmd */ 239 if (!Flag(FPOSIX) || (cf & CMDWORD)) 240 cf |= ALIAS; 241 } 242 243 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 244 statep->type = state; 245 246 /* collect non-special or quoted characters to form word */ 247 while (!((c = getsc()) == 0 || 248 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { 249 if (state == SBASE && 250 subshell_nesting_type == ORD(/*{*/ '}') && 251 (unsigned int)c == ORD(/*{*/ '}')) 252 /* possibly end ${ :;} */ 253 break; 254 Xcheck(ws, wp); 255 switch (state) { 256 case SADELIM: 257 if ((unsigned int)c == ORD('(')) 258 statep->nparen++; 259 else if ((unsigned int)c == ORD(')')) 260 statep->nparen--; 261 else if (statep->nparen == 0 && 262 ((unsigned int)c == ORD(/*{*/ '}') || 263 c == (int)statep->ls_adelim.delimiter)) { 264 *wp++ = ADELIM; 265 *wp++ = c; 266 if ((unsigned int)c == ORD(/*{*/ '}') || 267 --statep->ls_adelim.num == 0) 268 POP_STATE(); 269 if ((unsigned int)c == ORD(/*{*/ '}')) 270 POP_STATE(); 271 break; 272 } 273 /* FALLTHROUGH */ 274 case SBASE: 275 if ((unsigned int)c == ORD('[') && (cf & CMDASN)) { 276 /* temporary */ 277 *wp = EOS; 278 if (is_wdvarname(Xstring(ws, wp), false)) { 279 char *p, *tmp; 280 281 if (arraysub(&tmp)) { 282 *wp++ = CHAR; 283 *wp++ = c; 284 for (p = tmp; *p; ) { 285 Xcheck(ws, wp); 286 *wp++ = CHAR; 287 *wp++ = *p++; 288 } 289 afree(tmp, ATEMP); 290 break; 291 } 292 } 293 *wp++ = CHAR; 294 *wp++ = c; 295 break; 296 } 297 /* FALLTHROUGH */ 298 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 299 if (ctype(c, C_PATMO)) { 300 c2 = getsc(); 301 if ((unsigned int)c2 == ORD('(' /*)*/)) { 302 *wp++ = OPAT; 303 *wp++ = c; 304 PUSH_STATE(SPATTERN); 305 break; 306 } 307 ungetsc(c2); 308 } 309 /* FALLTHROUGH */ 310 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 311 switch (c) { 312 case ORD('\\'): 313 getsc_qchar: 314 if ((c = getsc())) { 315 /* trailing \ is lost */ 316 *wp++ = QCHAR; 317 *wp++ = c; 318 } 319 break; 320 case ORD('\''): 321 open_ssquote_unless_heredoc: 322 if ((cf & HEREDOC)) 323 goto store_char; 324 *wp++ = OQUOTE; 325 ignore_backslash_newline++; 326 PUSH_STATE(SSQUOTE); 327 break; 328 case ORD('"'): 329 open_sdquote: 330 *wp++ = OQUOTE; 331 PUSH_STATE(SDQUOTE); 332 break; 333 case ORD('$'): 334 /* 335 * processing of dollar sign belongs into 336 * Subst, except for those which can open 337 * a string: $'' and $"" 338 */ 339 subst_dollar_ex: 340 c = getsc(); 341 switch (c) { 342 case ORD('"'): 343 goto open_sdquote; 344 case ORD('\''): 345 goto open_sequote; 346 default: 347 goto SubstS; 348 } 349 default: 350 goto Subst; 351 } 352 break; 353 354 Subst: 355 switch (c) { 356 case ORD('\\'): 357 c = getsc(); 358 switch (c) { 359 case ORD('"'): 360 if ((cf & HEREDOC)) 361 goto heredocquote; 362 /* FALLTHROUGH */ 363 case ORD('\\'): 364 case ORD('$'): 365 case ORD('`'): 366 store_qchar: 367 *wp++ = QCHAR; 368 *wp++ = c; 369 break; 370 default: 371 heredocquote: 372 Xcheck(ws, wp); 373 if (c) { 374 /* trailing \ is lost */ 375 *wp++ = CHAR; 376 *wp++ = '\\'; 377 *wp++ = CHAR; 378 *wp++ = c; 379 } 380 break; 381 } 382 break; 383 case ORD('$'): 384 c = getsc(); 385 SubstS: 386 if ((unsigned int)c == ORD('(' /*)*/)) { 387 c = getsc(); 388 if ((unsigned int)c == ORD('(' /*)*/)) { 389 *wp++ = EXPRSUB; 390 PUSH_SRETRACE(SASPAREN); 391 statep->nparen = 2; 392 *retrace_info->xp++ = '('; 393 } else { 394 ungetsc(c); 395 subst_command: 396 c = COMSUB; 397 subst_command2: 398 sp = yyrecursive(c); 399 cz = strlen(sp) + 1; 400 XcheckN(ws, wp, cz); 401 *wp++ = c; 402 memcpy(wp, sp, cz); 403 wp += cz; 404 } 405 } else if ((unsigned int)c == ORD('{' /*}*/)) { 406 if ((unsigned int)(c = getsc()) == ORD('|')) { 407 /* 408 * non-subenvironment 409 * value substitution 410 */ 411 c = VALSUB; 412 goto subst_command2; 413 } else if (ctype(c, C_IFSWS)) { 414 /* 415 * non-subenvironment 416 * "command" substitution 417 */ 418 c = FUNSUB; 419 goto subst_command2; 420 } 421 ungetsc(c); 422 *wp++ = OSUBST; 423 *wp++ = '{' /*}*/; 424 wp = get_brace_var(&ws, wp); 425 c = getsc(); 426 /* allow :# and :% (ksh88 compat) */ 427 if ((unsigned int)c == ORD(':')) { 428 *wp++ = CHAR; 429 *wp++ = c; 430 c = getsc(); 431 if ((unsigned int)c == ORD(':')) { 432 *wp++ = CHAR; 433 *wp++ = '0'; 434 *wp++ = ADELIM; 435 *wp++ = ':'; 436 PUSH_STATE(SBRACE); 437 PUSH_STATE(SADELIM); 438 statep->ls_adelim.delimiter = ':'; 439 statep->ls_adelim.num = 1; 440 statep->nparen = 0; 441 break; 442 } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) || 443 /*XXX what else? */ 444 c == '(' /*)*/) { 445 /* substring subst. */ 446 if (c != ' ') { 447 *wp++ = CHAR; 448 *wp++ = ' '; 449 } 450 ungetsc(c); 451 PUSH_STATE(SBRACE); 452 PUSH_STATE(SADELIM); 453 statep->ls_adelim.delimiter = ':'; 454 statep->ls_adelim.num = 2; 455 statep->nparen = 0; 456 break; 457 } 458 } else if (c == '/') { 459 c2 = ADELIM; 460 parse_adelim_slash: 461 *wp++ = CHAR; 462 *wp++ = c; 463 if ((unsigned int)(c = getsc()) == ORD('/')) { 464 *wp++ = c2; 465 *wp++ = c; 466 } else 467 ungetsc(c); 468 PUSH_STATE(SBRACE); 469 PUSH_STATE(SADELIM); 470 statep->ls_adelim.delimiter = '/'; 471 statep->ls_adelim.num = 1; 472 statep->nparen = 0; 473 break; 474 } else if (c == '@') { 475 c2 = getsc(); 476 ungetsc(c2); 477 if ((unsigned int)c2 == ORD('/')) { 478 c2 = CHAR; 479 goto parse_adelim_slash; 480 } 481 } 482 /* 483 * If this is a trim operation, 484 * treat (,|,) specially in STBRACE. 485 */ 486 if (ctype(c, C_SUB2)) { 487 ungetsc(c); 488 if (Flag(FSH)) 489 PUSH_STATE(STBRACEBOURNE); 490 else 491 PUSH_STATE(STBRACEKORN); 492 } else { 493 ungetsc(c); 494 if (state == SDQUOTE || 495 state == SQBRACE) 496 PUSH_STATE(SQBRACE); 497 else 498 PUSH_STATE(SBRACE); 499 } 500 } else if (ctype(c, C_ALPHX)) { 501 *wp++ = OSUBST; 502 *wp++ = 'X'; 503 do { 504 Xcheck(ws, wp); 505 *wp++ = c; 506 c = getsc(); 507 } while (ctype(c, C_ALNUX)); 508 *wp++ = '\0'; 509 *wp++ = CSUBST; 510 *wp++ = 'X'; 511 ungetsc(c); 512 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 513 Xcheck(ws, wp); 514 *wp++ = OSUBST; 515 *wp++ = 'X'; 516 *wp++ = c; 517 *wp++ = '\0'; 518 *wp++ = CSUBST; 519 *wp++ = 'X'; 520 } else { 521 *wp++ = CHAR; 522 *wp++ = '$'; 523 ungetsc(c); 524 } 525 break; 526 case ORD('`'): 527 subst_gravis: 528 PUSH_STATE(SBQUOTE); 529 *wp++ = COMASUB; 530 /* 531 * We need to know whether we are within double 532 * quotes in order to translate \" to " within 533 * "`\"`" because, unlike for COMSUBs, the 534 * outer double quoteing changes the backslash 535 * meaning for the inside. For more details: 536 * http://austingroupbugs.net/view.php?id=1015 537 */ 538 statep->ls_bool = false; 539 s2 = statep; 540 base = state_info.base; 541 while (/* CONSTCOND */ 1) { 542 for (; s2 != base; s2--) { 543 if (s2->type == SDQUOTE) { 544 statep->ls_bool = true; 545 break; 546 } 547 } 548 if (s2 != base) 549 break; 550 if (!(s2 = s2->ls_base)) 551 break; 552 base = s2-- - STATE_BSIZE; 553 } 554 break; 555 case QCHAR: 556 if (cf & LQCHAR) { 557 *wp++ = QCHAR; 558 *wp++ = getsc(); 559 break; 560 } 561 /* FALLTHROUGH */ 562 default: 563 store_char: 564 *wp++ = CHAR; 565 *wp++ = c; 566 } 567 break; 568 569 case SEQUOTE: 570 if ((unsigned int)c == ORD('\'')) { 571 POP_STATE(); 572 *wp++ = CQUOTE; 573 ignore_backslash_newline--; 574 } else if ((unsigned int)c == ORD('\\')) { 575 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1) 576 c2 = getsc(); 577 if (c2 == 0) 578 statep->ls_bool = true; 579 if (!statep->ls_bool) { 580 char ts[4]; 581 582 if ((unsigned int)c2 < 0x100) { 583 *wp++ = QCHAR; 584 *wp++ = c2; 585 } else { 586 cz = utf_wctomb(ts, c2 - 0x100); 587 ts[cz] = 0; 588 cz = 0; 589 do { 590 *wp++ = QCHAR; 591 *wp++ = ts[cz]; 592 } while (ts[++cz]); 593 } 594 } 595 } else if (!statep->ls_bool) { 596 *wp++ = QCHAR; 597 *wp++ = c; 598 } 599 break; 600 601 case SSQUOTE: 602 if ((unsigned int)c == ORD('\'')) { 603 POP_STATE(); 604 if ((cf & HEREDOC) || state == SQBRACE) 605 goto store_char; 606 *wp++ = CQUOTE; 607 ignore_backslash_newline--; 608 } else { 609 *wp++ = QCHAR; 610 *wp++ = c; 611 } 612 break; 613 614 case SDQUOTE: 615 if ((unsigned int)c == ORD('"')) { 616 POP_STATE(); 617 *wp++ = CQUOTE; 618 } else 619 goto Subst; 620 break; 621 622 /* $(( ... )) */ 623 case SASPAREN: 624 if ((unsigned int)c == ORD('(')) 625 statep->nparen++; 626 else if ((unsigned int)c == ORD(')')) { 627 statep->nparen--; 628 if (statep->nparen == 1) { 629 /* end of EXPRSUB */ 630 POP_SRETRACE(); 631 632 if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) { 633 cz = strlen(sp) - 2; 634 XcheckN(ws, wp, cz); 635 memcpy(wp, sp + 1, cz); 636 wp += cz; 637 afree(sp, ATEMP); 638 *wp++ = '\0'; 639 break; 640 } else { 641 Source *s; 642 643 ungetsc(c2); 644 /* 645 * mismatched parenthesis - 646 * assume we were really 647 * parsing a $(...) expression 648 */ 649 --wp; 650 s = pushs(SREREAD, 651 source->areap); 652 s->start = s->str = 653 s->u.freeme = sp; 654 s->next = source; 655 source = s; 656 goto subst_command; 657 } 658 } 659 } 660 /* reuse existing state machine */ 661 goto Sbase2; 662 663 case SQBRACE: 664 if ((unsigned int)c == ORD('\\')) { 665 /* 666 * perform POSIX "quote removal" if the back- 667 * slash is "special", i.e. same cases as the 668 * {case '\\':} in Subst: plus closing brace; 669 * in mksh code "quote removal" on '\c' means 670 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 671 * emitted (in heredocquote:) 672 */ 673 if ((unsigned int)(c = getsc()) == ORD('"') || 674 (unsigned int)c == ORD('\\') || 675 ctype(c, C_DOLAR | C_GRAVE) || 676 (unsigned int)c == ORD(/*{*/ '}')) 677 goto store_qchar; 678 goto heredocquote; 679 } 680 goto common_SQBRACE; 681 682 case SBRACE: 683 if ((unsigned int)c == ORD('\'')) 684 goto open_ssquote_unless_heredoc; 685 else if ((unsigned int)c == ORD('\\')) 686 goto getsc_qchar; 687 common_SQBRACE: 688 if ((unsigned int)c == ORD('"')) 689 goto open_sdquote; 690 else if ((unsigned int)c == ORD('$')) 691 goto subst_dollar_ex; 692 else if ((unsigned int)c == ORD('`')) 693 goto subst_gravis; 694 else if ((unsigned int)c != ORD(/*{*/ '}')) 695 goto store_char; 696 POP_STATE(); 697 *wp++ = CSUBST; 698 *wp++ = /*{*/ '}'; 699 break; 700 701 /* Same as SBASE, except (,|,) treated specially */ 702 case STBRACEKORN: 703 if ((unsigned int)c == ORD('|')) 704 *wp++ = SPAT; 705 else if ((unsigned int)c == ORD('(')) { 706 *wp++ = OPAT; 707 /* simile for @ */ 708 *wp++ = ' '; 709 PUSH_STATE(SPATTERN); 710 } else /* FALLTHROUGH */ 711 case STBRACEBOURNE: 712 if ((unsigned int)c == ORD(/*{*/ '}')) { 713 POP_STATE(); 714 *wp++ = CSUBST; 715 *wp++ = /*{*/ '}'; 716 } else 717 goto Sbase1; 718 break; 719 720 case SBQUOTE: 721 if ((unsigned int)c == ORD('`')) { 722 *wp++ = 0; 723 POP_STATE(); 724 } else if ((unsigned int)c == ORD('\\')) { 725 switch (c = getsc()) { 726 case 0: 727 /* trailing \ is lost */ 728 break; 729 case ORD('$'): 730 case ORD('`'): 731 case ORD('\\'): 732 *wp++ = c; 733 break; 734 case ORD('"'): 735 if (statep->ls_bool) { 736 *wp++ = c; 737 break; 738 } 739 /* FALLTHROUGH */ 740 default: 741 *wp++ = '\\'; 742 *wp++ = c; 743 break; 744 } 745 } else 746 *wp++ = c; 747 break; 748 749 /* ONEWORD */ 750 case SWORD: 751 goto Subst; 752 753 /* LETEXPR: (( ... )) */ 754 case SLETPAREN: 755 if ((unsigned int)c == ORD(/*(*/ ')')) { 756 if (statep->nparen > 0) 757 --statep->nparen; 758 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) { 759 c = 0; 760 *wp++ = CQUOTE; 761 goto Done; 762 } else { 763 Source *s; 764 765 ungetsc(c2); 766 ungetsc(c); 767 /* 768 * mismatched parenthesis - 769 * assume we were really 770 * parsing a (...) expression 771 */ 772 *wp = EOS; 773 sp = Xstring(ws, wp); 774 dp = wdstrip(sp + 1, WDS_TPUTS); 775 s = pushs(SREREAD, source->areap); 776 s->start = s->str = s->u.freeme = dp; 777 s->next = source; 778 source = s; 779 ungetsc('(' /*)*/); 780 return (ORD('(' /*)*/)); 781 } 782 } else if ((unsigned int)c == ORD('(')) 783 /* 784 * parentheses inside quotes and 785 * backslashes are lost, but AT&T ksh 786 * doesn't count them either 787 */ 788 ++statep->nparen; 789 goto Sbase2; 790 791 /* << or <<- delimiter */ 792 case SHEREDELIM: 793 /* 794 * here delimiters need a special case since 795 * $ and `...` are not to be treated specially 796 */ 797 switch (c) { 798 case ORD('\\'): 799 if ((c = getsc())) { 800 /* trailing \ is lost */ 801 *wp++ = QCHAR; 802 *wp++ = c; 803 } 804 break; 805 case ORD('\''): 806 goto open_ssquote_unless_heredoc; 807 case ORD('$'): 808 if ((unsigned int)(c2 = getsc()) == ORD('\'')) { 809 open_sequote: 810 *wp++ = OQUOTE; 811 ignore_backslash_newline++; 812 PUSH_STATE(SEQUOTE); 813 statep->ls_bool = false; 814 break; 815 } else if ((unsigned int)c2 == ORD('"')) { 816 /* FALLTHROUGH */ 817 case ORD('"'): 818 PUSH_SRETRACE(SHEREDQUOTE); 819 break; 820 } 821 ungetsc(c2); 822 /* FALLTHROUGH */ 823 default: 824 *wp++ = CHAR; 825 *wp++ = c; 826 } 827 break; 828 829 /* " in << or <<- delimiter */ 830 case SHEREDQUOTE: 831 if ((unsigned int)c != ORD('"')) 832 goto Subst; 833 POP_SRETRACE(); 834 dp = strnul(sp) - 1; 835 /* remove the trailing double quote */ 836 *dp = '\0'; 837 /* store the quoted string */ 838 *wp++ = OQUOTE; 839 XcheckN(ws, wp, (dp - sp) * 2); 840 dp = sp; 841 while ((c = *dp++)) { 842 if (c == '\\') { 843 switch ((c = *dp++)) { 844 case ORD('\\'): 845 case ORD('"'): 846 case ORD('$'): 847 case ORD('`'): 848 break; 849 default: 850 *wp++ = CHAR; 851 *wp++ = '\\'; 852 break; 853 } 854 } 855 *wp++ = CHAR; 856 *wp++ = c; 857 } 858 afree(sp, ATEMP); 859 *wp++ = CQUOTE; 860 state = statep->type = SHEREDELIM; 861 break; 862 863 /* in *(...|...) pattern (*+?@!) */ 864 case SPATTERN: 865 if ((unsigned int)c == ORD(/*(*/ ')')) { 866 *wp++ = CPAT; 867 POP_STATE(); 868 } else if ((unsigned int)c == ORD('|')) { 869 *wp++ = SPAT; 870 } else if ((unsigned int)c == ORD('(')) { 871 *wp++ = OPAT; 872 /* simile for @ */ 873 *wp++ = ' '; 874 PUSH_STATE(SPATTERN); 875 } else 876 goto Sbase1; 877 break; 878 } 879 } 880 Done: 881 Xcheck(ws, wp); 882 if (statep != &states[1]) 883 /* XXX figure out what is missing */ 884 yyerror("no closing quote"); 885 886 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 887 if (state == SHEREDELIM) 888 state = SBASE; 889 890 dp = Xstring(ws, wp); 891 if (state == SBASE && ( 892 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || 893 ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 || 894 (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) { 895 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 896 897 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1; 898 899 if (c == '&') { 900 if ((unsigned int)(c2 = getsc()) != ORD('>')) { 901 ungetsc(c2); 902 goto no_iop; 903 } 904 c = c2; 905 iop->ioflag = IOBASH; 906 } else 907 iop->ioflag = 0; 908 909 c2 = getsc(); 910 /* <<, >>, <> are ok, >< is not */ 911 if (c == c2 || ((unsigned int)c == ORD('<') && 912 (unsigned int)c2 == ORD('>'))) { 913 iop->ioflag |= c == c2 ? 914 ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR; 915 if (iop->ioflag == IOHERE) { 916 if ((unsigned int)(c2 = getsc()) == ORD('-')) 917 iop->ioflag |= IOSKIP; 918 else if ((unsigned int)c2 == ORD('<')) 919 iop->ioflag |= IOHERESTR; 920 else 921 ungetsc(c2); 922 } 923 } else if ((unsigned int)c2 == ORD('&')) 924 iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0); 925 else { 926 iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD; 927 if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|')) 928 iop->ioflag |= IOCLOB; 929 else 930 ungetsc(c2); 931 } 932 933 iop->ioname = NULL; 934 iop->delim = NULL; 935 iop->heredoc = NULL; 936 /* free word */ 937 Xfree(ws, wp); 938 yylval.iop = iop; 939 return (REDIR); 940 no_iop: 941 afree(iop, ATEMP); 942 } 943 944 if (wp == dp && state == SBASE) { 945 /* free word */ 946 Xfree(ws, wp); 947 /* no word, process LEX1 character */ 948 if (((unsigned int)c == ORD('|')) || 949 ((unsigned int)c == ORD('&')) || 950 ((unsigned int)c == ORD(';')) || 951 ((unsigned int)c == ORD('(' /*)*/))) { 952 if ((c2 = getsc()) == c) 953 c = ((unsigned int)c == ORD(';')) ? BREAK : 954 ((unsigned int)c == ORD('|')) ? LOGOR : 955 ((unsigned int)c == ORD('&')) ? LOGAND : 956 /* (unsigned int)c == ORD('(' )) */ MDPAREN; 957 else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&')) 958 c = COPROC; 959 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|')) 960 c = BRKEV; 961 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&')) 962 c = BRKFT; 963 else 964 ungetsc(c2); 965 #ifndef MKSH_SMALL 966 if (c == BREAK) { 967 if ((unsigned int)(c2 = getsc()) == ORD('&')) 968 c = BRKEV; 969 else 970 ungetsc(c2); 971 } 972 #endif 973 } else if ((unsigned int)c == ORD('\n')) { 974 if (cf & HEREDELIM) 975 ungetsc(c); 976 else { 977 gethere(); 978 if (cf & CONTIN) 979 goto Again; 980 } 981 } else if (c == '\0' && !(cf & HEREDELIM)) { 982 struct ioword **p = heres; 983 984 while (p < herep) 985 if ((*p)->ioflag & IOHERESTR) 986 ++p; 987 else 988 /* ksh -c 'cat <<EOF' can cause this */ 989 yyerror(Tf_heredoc, 990 evalstr((*p)->delim, 0)); 991 } 992 return (c); 993 } 994 995 /* terminate word */ 996 *wp++ = EOS; 997 yylval.cp = Xclose(ws, wp); 998 if (state == SWORD || state == SLETPAREN 999 /* XXX ONEWORD? */) 1000 return (LWORD); 1001 1002 /* unget terminator */ 1003 ungetsc(c); 1004 1005 /* 1006 * note: the alias-vs-function code below depends on several 1007 * interna: starting from here, source->str is not modified; 1008 * the way getsc() and ungetsc() operate; etc. 1009 */ 1010 1011 /* copy word to unprefixed string ident */ 1012 sp = yylval.cp; 1013 dp = ident; 1014 while ((dp - ident) < IDENT && (c = *sp++) == CHAR) 1015 *dp++ = *sp++; 1016 if (c != EOS) 1017 /* word is not unquoted, or space ran out */ 1018 dp = ident; 1019 /* make sure the ident array stays NUL padded */ 1020 memset(dp, 0, (ident + IDENT) - dp + 1); 1021 1022 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) { 1023 struct tbl *p; 1024 uint32_t h = hash(ident); 1025 1026 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1027 (!(cf & ESACONLY) || p->val.i == ESAC || 1028 (unsigned int)p->val.i == ORD(/*{*/ '}'))) { 1029 afree(yylval.cp, ATEMP); 1030 return (p->val.i); 1031 } 1032 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1033 (p->flag & ISSET)) { 1034 /* 1035 * this still points to the same character as the 1036 * ungetsc'd terminator from above 1037 */ 1038 const char *cp = source->str; 1039 1040 /* prefer POSIX but not Korn functions over aliases */ 1041 while (ctype(*cp, C_BLANK)) 1042 /* 1043 * this is like getsc() without skipping 1044 * over Source boundaries (including not 1045 * parsing ungetsc'd characters that got 1046 * pushed into an SREREAD) which is what 1047 * we want here anyway: find out whether 1048 * the alias name is followed by a POSIX 1049 * function definition 1050 */ 1051 ++cp; 1052 /* prefer functions over aliases */ 1053 if (cp[0] != '(' || cp[1] != ')') { 1054 Source *s = source; 1055 1056 while (s && (s->flags & SF_HASALIAS)) 1057 if (s->u.tblp == p) 1058 return (LWORD); 1059 else 1060 s = s->next; 1061 /* push alias expansion */ 1062 s = pushs(SALIAS, source->areap); 1063 s->start = s->str = p->val.s; 1064 s->u.tblp = p; 1065 s->flags |= SF_HASALIAS; 1066 s->line = source->line; 1067 s->next = source; 1068 if (source->type == SEOF) { 1069 /* prevent infinite recursion at EOS */ 1070 source->u.tblp = p; 1071 source->flags |= SF_HASALIAS; 1072 } 1073 source = s; 1074 afree(yylval.cp, ATEMP); 1075 goto Again; 1076 } 1077 } 1078 } else if (*ident == '\0') { 1079 /* retain typeset et al. even when quoted */ 1080 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0))); 1081 uint32_t flag = tt ? tt->flag : 0; 1082 1083 if (flag & (DECL_UTIL | DECL_FWDR)) 1084 strlcpy(ident, dp, sizeof(ident)); 1085 afree(dp, ATEMP); 1086 } 1087 1088 return (LWORD); 1089 } 1090 1091 static void 1092 gethere(void) 1093 { 1094 struct ioword **p; 1095 1096 for (p = heres; p < herep; p++) 1097 if (!((*p)->ioflag & IOHERESTR)) 1098 readhere(*p); 1099 herep = heres; 1100 } 1101 1102 /* 1103 * read "<<word" text into temp file 1104 */ 1105 1106 static void 1107 readhere(struct ioword *iop) 1108 { 1109 int c; 1110 const char *eof, *eofp; 1111 XString xs; 1112 char *xp; 1113 size_t xpos; 1114 1115 eof = evalstr(iop->delim, 0); 1116 1117 if (!(iop->ioflag & IOEVAL)) 1118 ignore_backslash_newline++; 1119 1120 Xinit(xs, xp, 256, ATEMP); 1121 1122 heredoc_read_line: 1123 /* beginning of line */ 1124 eofp = eof; 1125 xpos = Xsavepos(xs, xp); 1126 if (iop->ioflag & IOSKIP) { 1127 /* skip over leading tabs */ 1128 while ((c = getsc()) == '\t') 1129 ; /* nothing */ 1130 goto heredoc_parse_char; 1131 } 1132 heredoc_read_char: 1133 c = getsc(); 1134 heredoc_parse_char: 1135 /* compare with here document marker */ 1136 if (!*eofp) { 1137 /* end of here document marker, what to do? */ 1138 switch (c) { 1139 case ORD(/*(*/ ')'): 1140 if (!subshell_nesting_type) 1141 /*- 1142 * not allowed outside $(...) or (...) 1143 * => mismatch 1144 */ 1145 break; 1146 /* allow $(...) or (...) to close here */ 1147 ungetsc(/*(*/ ')'); 1148 /* FALLTHROUGH */ 1149 case 0: 1150 /* 1151 * Allow EOF here to commands without trailing 1152 * newlines (mksh -c '...') will work as well. 1153 */ 1154 case ORD('\n'): 1155 /* Newline terminates here document marker */ 1156 goto heredoc_found_terminator; 1157 } 1158 } else if (c == *eofp++) 1159 /* store; then read and compare next character */ 1160 goto heredoc_store_and_loop; 1161 /* nope, mismatch; read until end of line */ 1162 while (c != '\n') { 1163 if (!c) 1164 /* oops, reached EOF */ 1165 yyerror(Tf_heredoc, eof); 1166 /* store character */ 1167 Xcheck(xs, xp); 1168 Xput(xs, xp, c); 1169 /* read next character */ 1170 c = getsc(); 1171 } 1172 /* we read a newline as last character */ 1173 heredoc_store_and_loop: 1174 /* store character */ 1175 Xcheck(xs, xp); 1176 Xput(xs, xp, c); 1177 if (c == '\n') 1178 goto heredoc_read_line; 1179 goto heredoc_read_char; 1180 1181 heredoc_found_terminator: 1182 /* jump back to saved beginning of line */ 1183 xp = Xrestpos(xs, xp, xpos); 1184 /* terminate, close and store */ 1185 Xput(xs, xp, '\0'); 1186 iop->heredoc = Xclose(xs, xp); 1187 1188 if (!(iop->ioflag & IOEVAL)) 1189 ignore_backslash_newline--; 1190 } 1191 1192 void 1193 yyerror(const char *fmt, ...) 1194 { 1195 va_list va; 1196 1197 /* pop aliases and re-reads */ 1198 while (source->type == SALIAS || source->type == SREREAD) 1199 source = source->next; 1200 /* zap pending input */ 1201 source->str = null; 1202 1203 error_prefix(true); 1204 va_start(va, fmt); 1205 shf_vfprintf(shl_out, fmt, va); 1206 shf_putc('\n', shl_out); 1207 va_end(va); 1208 errorfz(); 1209 } 1210 1211 /* 1212 * input for yylex with alias expansion 1213 */ 1214 1215 Source * 1216 pushs(int type, Area *areap) 1217 { 1218 Source *s; 1219 1220 s = alloc(sizeof(Source), areap); 1221 memset(s, 0, sizeof(Source)); 1222 s->type = type; 1223 s->str = null; 1224 s->areap = areap; 1225 if (type == SFILE || type == SSTDIN) 1226 XinitN(s->xs, 256, s->areap); 1227 return (s); 1228 } 1229 1230 static int 1231 getsc_uu(void) 1232 { 1233 Source *s = source; 1234 int c; 1235 1236 while ((c = ord(*s->str++)) == 0) { 1237 /* return 0 for EOF by default */ 1238 s->str = NULL; 1239 switch (s->type) { 1240 case SEOF: 1241 s->str = null; 1242 return (0); 1243 1244 case SSTDIN: 1245 case SFILE: 1246 getsc_line(s); 1247 break; 1248 1249 case SWSTR: 1250 break; 1251 1252 case SSTRING: 1253 case SSTRINGCMDLINE: 1254 break; 1255 1256 case SWORDS: 1257 s->start = s->str = *s->u.strv++; 1258 s->type = SWORDSEP; 1259 break; 1260 1261 case SWORDSEP: 1262 if (*s->u.strv == NULL) { 1263 s->start = s->str = "\n"; 1264 s->type = SEOF; 1265 } else { 1266 s->start = s->str = T1space; 1267 s->type = SWORDS; 1268 } 1269 break; 1270 1271 case SALIAS: 1272 if (s->flags & SF_ALIASEND) { 1273 /* pass on an unused SF_ALIAS flag */ 1274 source = s->next; 1275 source->flags |= s->flags & SF_ALIAS; 1276 s = source; 1277 } else if (*s->u.tblp->val.s && 1278 ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) { 1279 /* pop source stack */ 1280 source = s = s->next; 1281 /* 1282 * Note that this alias ended with a 1283 * space, enabling alias expansion on 1284 * the following word. 1285 */ 1286 s->flags |= SF_ALIAS; 1287 } else { 1288 /* 1289 * At this point, we need to keep the current 1290 * alias in the source list so recursive 1291 * aliases can be detected and we also need to 1292 * return the next character. Do this by 1293 * temporarily popping the alias to get the 1294 * next character and then put it back in the 1295 * source list with the SF_ALIASEND flag set. 1296 */ 1297 /* pop source stack */ 1298 source = s->next; 1299 source->flags |= s->flags & SF_ALIAS; 1300 c = getsc_uu(); 1301 if (c) { 1302 s->flags |= SF_ALIASEND; 1303 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1304 s->start = s->str = s->ugbuf; 1305 s->next = source; 1306 source = s; 1307 } else { 1308 s = source; 1309 /* avoid reading EOF twice */ 1310 s->str = NULL; 1311 break; 1312 } 1313 } 1314 continue; 1315 1316 case SREREAD: 1317 if (s->start != s->ugbuf) 1318 /* yuck */ 1319 afree(s->u.freeme, ATEMP); 1320 source = s = s->next; 1321 continue; 1322 } 1323 if (s->str == NULL) { 1324 s->type = SEOF; 1325 s->start = s->str = null; 1326 return ('\0'); 1327 } 1328 if (s->flags & SF_ECHO) { 1329 shf_puts(s->str, shl_out); 1330 shf_flush(shl_out); 1331 } 1332 } 1333 return (c); 1334 } 1335 1336 static void 1337 getsc_line(Source *s) 1338 { 1339 char *xp = Xstring(s->xs, xp), *cp; 1340 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1341 bool have_tty = tobool(interactive && (s->flags & SF_TTY)); 1342 1343 /* Done here to ensure nothing odd happens when a timeout occurs */ 1344 XcheckN(s->xs, xp, LINE); 1345 *xp = '\0'; 1346 s->start = s->str = xp; 1347 1348 if (have_tty && ksh_tmout) { 1349 ksh_tmout_state = TMOUT_READING; 1350 alarm(ksh_tmout); 1351 } 1352 if (interactive) { 1353 if (cur_prompt == PS1) 1354 histsave(&s->line, NULL, HIST_FLUSH, true); 1355 change_winsz(); 1356 } 1357 #ifndef MKSH_NO_CMDLINE_EDITING 1358 if (have_tty && ( 1359 #if !MKSH_S_NOVI 1360 Flag(FVI) || 1361 #endif 1362 Flag(FEMACS) || Flag(FGMACS))) { 1363 int nread; 1364 1365 nread = x_read(xp); 1366 if (nread < 0) 1367 /* read error */ 1368 nread = 0; 1369 xp[nread] = '\0'; 1370 xp += nread; 1371 } else 1372 #endif 1373 { 1374 if (interactive) 1375 pprompt(prompt, 0); 1376 else 1377 s->line++; 1378 1379 while (/* CONSTCOND */ 1) { 1380 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1381 1382 if (!p && shf_error(s->u.shf) && 1383 shf_errno(s->u.shf) == EINTR) { 1384 shf_clearerr(s->u.shf); 1385 if (trap) 1386 runtraps(0); 1387 continue; 1388 } 1389 if (!p || (xp = p, xp[-1] == '\n')) 1390 break; 1391 /* double buffer size */ 1392 /* move past NUL so doubling works... */ 1393 xp++; 1394 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1395 /* ...and move back again */ 1396 xp--; 1397 } 1398 /* 1399 * flush any unwanted input so other programs/builtins 1400 * can read it. Not very optimal, but less error prone 1401 * than flushing else where, dealing with redirections, 1402 * etc. 1403 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1404 */ 1405 if (s->type == SSTDIN) 1406 shf_flush(s->u.shf); 1407 } 1408 /* 1409 * XXX: temporary kludge to restore source after a 1410 * trap may have been executed. 1411 */ 1412 source = s; 1413 if (have_tty && ksh_tmout) { 1414 ksh_tmout_state = TMOUT_EXECUTING; 1415 alarm(0); 1416 } 1417 cp = Xstring(s->xs, xp); 1418 rndpush(cp); 1419 s->start = s->str = cp; 1420 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1421 /* Note: if input is all nulls, this is not eof */ 1422 if (Xlength(s->xs, xp) == 0) { 1423 /* EOF */ 1424 if (s->type == SFILE) 1425 shf_fdclose(s->u.shf); 1426 s->str = NULL; 1427 } else if (interactive && *s->str) { 1428 if (cur_prompt != PS1) 1429 histsave(&s->line, s->str, HIST_APPEND, true); 1430 else if (!ctype(*s->str, C_IFS | C_IFSWS)) 1431 histsave(&s->line, s->str, HIST_QUEUE, true); 1432 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1433 else 1434 goto check_for_sole_return; 1435 } else if (interactive && cur_prompt == PS1) { 1436 check_for_sole_return: 1437 cp = Xstring(s->xs, xp); 1438 while (ctype(*cp, C_IFSWS)) 1439 ++cp; 1440 if (!*cp) { 1441 histsave(&s->line, NULL, HIST_FLUSH, true); 1442 histsync(); 1443 } 1444 #endif 1445 } 1446 if (interactive) 1447 set_prompt(PS2, NULL); 1448 } 1449 1450 void 1451 set_prompt(int to, Source *s) 1452 { 1453 cur_prompt = (uint8_t)to; 1454 1455 switch (to) { 1456 /* command */ 1457 case PS1: 1458 /* 1459 * Substitute ! and !! here, before substitutions are done 1460 * so ! in expanded variables are not expanded. 1461 * NOTE: this is not what AT&T ksh does (it does it after 1462 * substitutions, POSIX doesn't say which is to be done. 1463 */ 1464 { 1465 struct shf *shf; 1466 char * volatile ps1; 1467 Area *saved_atemp; 1468 int saved_lineno; 1469 1470 ps1 = str_val(global("PS1")); 1471 shf = shf_sopen(NULL, strlen(ps1) * 2, 1472 SHF_WR | SHF_DYNAMIC, NULL); 1473 while (*ps1) 1474 if (*ps1 != '!' || *++ps1 == '!') 1475 shf_putchar(*ps1++, shf); 1476 else 1477 shf_fprintf(shf, Tf_lu, s ? 1478 (unsigned long)s->line + 1 : 0UL); 1479 ps1 = shf_sclose(shf); 1480 saved_lineno = current_lineno; 1481 if (s) 1482 current_lineno = s->line + 1; 1483 saved_atemp = ATEMP; 1484 newenv(E_ERRH); 1485 if (kshsetjmp(e->jbuf)) { 1486 prompt = safe_prompt; 1487 /* 1488 * Don't print an error - assume it has already 1489 * been printed. Reason is we may have forked 1490 * to run a command and the child may be 1491 * unwinding its stack through this code as it 1492 * exits. 1493 */ 1494 } else { 1495 char *cp = substitute(ps1, 0); 1496 strdupx(prompt, cp, saved_atemp); 1497 } 1498 current_lineno = saved_lineno; 1499 quitenv(NULL); 1500 } 1501 break; 1502 /* command continuation */ 1503 case PS2: 1504 prompt = str_val(global("PS2")); 1505 break; 1506 } 1507 } 1508 1509 int 1510 pprompt(const char *cp, int ntruncate) 1511 { 1512 char delimiter = 0; 1513 bool doprint = (ntruncate != -1); 1514 bool indelimit = false; 1515 int columns = 0, lines = 0; 1516 1517 /* 1518 * Undocumented AT&T ksh feature: 1519 * If the second char in the prompt string is \r then the first 1520 * char is taken to be a non-printing delimiter and any chars 1521 * between two instances of the delimiter are not considered to 1522 * be part of the prompt length 1523 */ 1524 if (*cp && cp[1] == '\r') { 1525 delimiter = *cp; 1526 cp += 2; 1527 } 1528 for (; *cp; cp++) { 1529 if (indelimit && *cp != delimiter) 1530 ; 1531 else if (ctype(*cp, C_CR | C_LF)) { 1532 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1533 columns = 0; 1534 } else if (*cp == '\t') { 1535 columns = (columns | 7) + 1; 1536 } else if (*cp == '\b') { 1537 if (columns > 0) 1538 columns--; 1539 } else if (*cp == delimiter) 1540 indelimit = !indelimit; 1541 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) { 1542 const char *cp2; 1543 columns += utf_widthadj(cp, &cp2); 1544 if (doprint && (indelimit || 1545 (ntruncate < (x_cols * lines + columns)))) 1546 shf_write(cp, cp2 - cp, shl_out); 1547 cp = cp2 - /* loop increment */ 1; 1548 continue; 1549 } else 1550 columns++; 1551 if (doprint && (*cp != delimiter) && 1552 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1553 shf_putc(*cp, shl_out); 1554 } 1555 if (doprint) 1556 shf_flush(shl_out); 1557 return (x_cols * lines + columns); 1558 } 1559 1560 /* 1561 * Read the variable part of a ${...} expression (i.e. up to but not 1562 * including the :[-+?=#%] or close-brace). 1563 */ 1564 static char * 1565 get_brace_var(XString *wsp, char *wp) 1566 { 1567 char c; 1568 enum parse_state { 1569 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG, 1570 PS_IDENT, PS_NUMBER, PS_VAR1 1571 } state = PS_INITIAL; 1572 1573 while (/* CONSTCOND */ 1) { 1574 c = getsc(); 1575 /* State machine to figure out where the variable part ends. */ 1576 switch (state) { 1577 case PS_SAW_HASH: 1578 if (ctype(c, C_VAR1)) { 1579 char c2; 1580 1581 c2 = getsc(); 1582 ungetsc(c2); 1583 if (ord(c2) != ORD(/*{*/ '}')) { 1584 ungetsc(c); 1585 goto out; 1586 } 1587 } 1588 goto ps_common; 1589 case PS_SAW_BANG: 1590 switch (ord(c)) { 1591 case ORD('@'): 1592 case ORD('#'): 1593 case ORD('-'): 1594 case ORD('?'): 1595 goto out; 1596 } 1597 goto ps_common; 1598 case PS_INITIAL: 1599 switch (ord(c)) { 1600 case ORD('%'): 1601 state = PS_SAW_PERCENT; 1602 goto next; 1603 case ORD('#'): 1604 state = PS_SAW_HASH; 1605 goto next; 1606 case ORD('!'): 1607 state = PS_SAW_BANG; 1608 goto next; 1609 } 1610 /* FALLTHROUGH */ 1611 case PS_SAW_PERCENT: 1612 ps_common: 1613 if (ctype(c, C_ALPHX)) 1614 state = PS_IDENT; 1615 else if (ctype(c, C_DIGIT)) 1616 state = PS_NUMBER; 1617 else if (ctype(c, C_VAR1)) 1618 state = PS_VAR1; 1619 else 1620 goto out; 1621 break; 1622 case PS_IDENT: 1623 if (!ctype(c, C_ALNUX)) { 1624 if (ord(c) == ORD('[')) { 1625 char *tmp, *p; 1626 1627 if (!arraysub(&tmp)) 1628 yyerror("missing ]"); 1629 *wp++ = c; 1630 p = tmp; 1631 while (*p) { 1632 Xcheck(*wsp, wp); 1633 *wp++ = *p++; 1634 } 1635 afree(tmp, ATEMP); 1636 /* the ] */ 1637 c = getsc(); 1638 } 1639 goto out; 1640 } 1641 next: 1642 break; 1643 case PS_NUMBER: 1644 if (!ctype(c, C_DIGIT)) 1645 goto out; 1646 break; 1647 case PS_VAR1: 1648 goto out; 1649 } 1650 Xcheck(*wsp, wp); 1651 *wp++ = c; 1652 } 1653 out: 1654 /* end of variable part */ 1655 *wp++ = '\0'; 1656 ungetsc(c); 1657 return (wp); 1658 } 1659 1660 /* 1661 * Save an array subscript - returns true if matching bracket found, false 1662 * if eof or newline was found. 1663 * (Returned string double null terminated) 1664 */ 1665 static bool 1666 arraysub(char **strp) 1667 { 1668 XString ws; 1669 char *wp, c; 1670 /* we are just past the initial [ */ 1671 unsigned int depth = 1; 1672 1673 Xinit(ws, wp, 32, ATEMP); 1674 1675 do { 1676 c = getsc(); 1677 Xcheck(ws, wp); 1678 *wp++ = c; 1679 if (ord(c) == ORD('[')) 1680 depth++; 1681 else if (ord(c) == ORD(']')) 1682 depth--; 1683 } while (depth > 0 && c && c != '\n'); 1684 1685 *wp++ = '\0'; 1686 *strp = Xclose(ws, wp); 1687 1688 return (tobool(depth == 0)); 1689 } 1690 1691 /* Unget a char: handles case when we are already at the start of the buffer */ 1692 static void 1693 ungetsc(int c) 1694 { 1695 struct sretrace_info *rp = retrace_info; 1696 1697 if (backslash_skip) 1698 backslash_skip--; 1699 /* Don't unget EOF... */ 1700 if (source->str == null && c == '\0') 1701 return; 1702 while (rp) { 1703 if (Xlength(rp->xs, rp->xp)) 1704 rp->xp--; 1705 rp = rp->next; 1706 } 1707 ungetsc_i(c); 1708 } 1709 static void 1710 ungetsc_i(int c) 1711 { 1712 if (source->str > source->start) 1713 source->str--; 1714 else { 1715 Source *s; 1716 1717 s = pushs(SREREAD, source->areap); 1718 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1719 s->start = s->str = s->ugbuf; 1720 s->next = source; 1721 source = s; 1722 } 1723 } 1724 1725 1726 /* Called to get a char that isn't a \newline sequence. */ 1727 static int 1728 getsc_bn(void) 1729 { 1730 int c, c2; 1731 1732 if (ignore_backslash_newline) 1733 return (o_getsc_u()); 1734 1735 if (backslash_skip == 1) { 1736 backslash_skip = 2; 1737 return (o_getsc_u()); 1738 } 1739 1740 backslash_skip = 0; 1741 1742 while (/* CONSTCOND */ 1) { 1743 c = o_getsc_u(); 1744 if (c == '\\') { 1745 if ((c2 = o_getsc_u()) == '\n') 1746 /* ignore the \newline; get the next char... */ 1747 continue; 1748 ungetsc_i(c2); 1749 backslash_skip = 1; 1750 } 1751 return (c); 1752 } 1753 } 1754 1755 void 1756 yyskiputf8bom(void) 1757 { 1758 int c; 1759 1760 if (rtt2asc((c = o_getsc_u())) != 0xEF) { 1761 ungetsc_i(c); 1762 return; 1763 } 1764 if (rtt2asc((c = o_getsc_u())) != 0xBB) { 1765 ungetsc_i(c); 1766 ungetsc_i(asc2rtt(0xEF)); 1767 return; 1768 } 1769 if (rtt2asc((c = o_getsc_u())) != 0xBF) { 1770 ungetsc_i(c); 1771 ungetsc_i(asc2rtt(0xBB)); 1772 ungetsc_i(asc2rtt(0xEF)); 1773 return; 1774 } 1775 UTFMODE |= 8; 1776 } 1777 1778 static Lex_state * 1779 push_state_i(State_info *si, Lex_state *old_end) 1780 { 1781 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1782 1783 news[0].ls_base = old_end; 1784 si->base = &news[0]; 1785 si->end = &news[STATE_BSIZE]; 1786 return (&news[1]); 1787 } 1788 1789 static Lex_state * 1790 pop_state_i(State_info *si, Lex_state *old_end) 1791 { 1792 Lex_state *old_base = si->base; 1793 1794 si->base = old_end->ls_base - STATE_BSIZE; 1795 si->end = old_end->ls_base; 1796 1797 afree(old_base, ATEMP); 1798 1799 return (si->base + STATE_BSIZE - 1); 1800 } 1801