1 /* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 * 2011, 2012, 2013, 2014, 2015, 2016 6 * mirabilos <m (at) mirbsd.org> 7 * 8 * Provided that these terms and disclaimer and all copyright notices 9 * are retained or reproduced in an accompanying document, permission 10 * is granted to deal in this work without restriction, including un- 11 * limited rights to use, publicly perform, distribute, sell, modify, 12 * merge, give away, or sublicence. 13 * 14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 15 * the utmost extent permitted by applicable law, neither express nor 16 * implied; without malicious intent or gross negligence. In no event 17 * may a licensor, author or contributor be held liable for indirect, 18 * direct, other damage, loss, or other issues arising in any way out 19 * of dealing in the work, even if advised of the possibility of such 20 * damage or existence of a defect, except proven that it results out 21 * of said person's immediate fault when using the work as intended. 22 */ 23 24 #include "sh.h" 25 26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.218 2016/01/20 21:34:12 tg Exp $"); 27 28 /* 29 * states while lexing word 30 */ 31 #define SBASE 0 /* outside any lexical constructs */ 32 #define SWORD 1 /* implicit quoting for substitute() */ 33 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 34 #define SSQUOTE 3 /* inside '' */ 35 #define SDQUOTE 4 /* inside "" */ 36 #define SEQUOTE 5 /* inside $'' */ 37 #define SBRACE 6 /* inside ${} */ 38 #define SQBRACE 7 /* inside "${}" */ 39 #define SBQUOTE 8 /* inside `` */ 40 #define SASPAREN 9 /* inside $(( )) */ 41 #define SHEREDELIM 10 /* parsing << or <<- delimiter */ 42 #define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */ 43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 44 #define SADELIM 13 /* like SBASE, looking for delimiter */ 45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 size_t start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 } Lex_state; 81 #define ls_base u.base 82 #define ls_start u.start 83 #define ls_bool u.abool 84 #define ls_adelim u.adelim 85 86 typedef struct { 87 Lex_state *base; 88 Lex_state *end; 89 } State_info; 90 91 static void readhere(struct ioword *); 92 static void ungetsc(int); 93 static void ungetsc_i(int); 94 static int getsc_uu(void); 95 static void getsc_line(Source *); 96 static int getsc_bn(void); 97 static int getsc_i(void); 98 static char *get_brace_var(XString *, char *); 99 static bool arraysub(char **); 100 static void gethere(void); 101 static Lex_state *push_state_i(State_info *, Lex_state *); 102 static Lex_state *pop_state_i(State_info *, Lex_state *); 103 104 static int backslash_skip; 105 static int ignore_backslash_newline; 106 107 /* optimised getsc_bn() */ 108 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 109 !backslash_skip ? *source->str++ : getsc_bn()) 110 /* optimised getsc_uu() */ 111 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 112 113 /* retrace helper */ 114 #define o_getsc_r(carg) \ 115 int cev = (carg); \ 116 struct sretrace_info *rp = retrace_info; \ 117 \ 118 while (rp) { \ 119 Xcheck(rp->xs, rp->xp); \ 120 *rp->xp++ = cev; \ 121 rp = rp->next; \ 122 } \ 123 \ 124 return (cev); 125 126 /* callback */ 127 static int 128 getsc_i(void) 129 { 130 o_getsc_r(o_getsc()); 131 } 132 133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) 134 #define getsc getsc_i 135 #else 136 static int getsc_r(int); 137 138 static int 139 getsc_r(int c) 140 { 141 o_getsc_r(c); 142 } 143 144 #define getsc() getsc_r(o_getsc()) 145 #endif 146 147 #define STATE_BSIZE 8 148 149 #define PUSH_STATE(s) do { \ 150 if (++statep == state_info.end) \ 151 statep = push_state_i(&state_info, statep); \ 152 state = statep->type = (s); \ 153 } while (/* CONSTCOND */ 0) 154 155 #define POP_STATE() do { \ 156 if (--statep == state_info.base) \ 157 statep = pop_state_i(&state_info, statep); \ 158 state = statep->type; \ 159 } while (/* CONSTCOND */ 0) 160 161 #define PUSH_SRETRACE(s) do { \ 162 struct sretrace_info *ri; \ 163 \ 164 PUSH_STATE(s); \ 165 statep->ls_start = Xsavepos(ws, wp); \ 166 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 167 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 168 ri->next = retrace_info; \ 169 retrace_info = ri; \ 170 } while (/* CONSTCOND */ 0) 171 172 #define POP_SRETRACE() do { \ 173 wp = Xrestpos(ws, wp, statep->ls_start); \ 174 *retrace_info->xp = '\0'; \ 175 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 176 dp = (void *)retrace_info; \ 177 retrace_info = retrace_info->next; \ 178 afree(dp, ATEMP); \ 179 POP_STATE(); \ 180 } while (/* CONSTCOND */ 0) 181 182 /** 183 * Lexical analyser 184 * 185 * tokens are not regular expressions, they are LL(1). 186 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 187 * hence the state stack. Note "$(...)" are now parsed recursively. 188 */ 189 190 int 191 yylex(int cf) 192 { 193 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 194 State_info state_info; 195 int c, c2, state; 196 size_t cz; 197 XString ws; /* expandable output word */ 198 char *wp; /* output word pointer */ 199 char *sp, *dp; 200 201 Again: 202 states[0].type = SINVALID; 203 states[0].ls_base = NULL; 204 statep = &states[1]; 205 state_info.base = states; 206 state_info.end = &state_info.base[STATE_BSIZE]; 207 208 Xinit(ws, wp, 64, ATEMP); 209 210 backslash_skip = 0; 211 ignore_backslash_newline = 0; 212 213 if (cf & ONEWORD) 214 state = SWORD; 215 else if (cf & LETEXPR) { 216 /* enclose arguments in (double) quotes */ 217 *wp++ = OQUOTE; 218 state = SLETPAREN; 219 statep->nparen = 0; 220 } else { 221 /* normal lexing */ 222 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 223 while ((c = getsc()) == ' ' || c == '\t') 224 ; 225 if (c == '#') { 226 ignore_backslash_newline++; 227 while ((c = getsc()) != '\0' && c != '\n') 228 ; 229 ignore_backslash_newline--; 230 } 231 ungetsc(c); 232 } 233 if (source->flags & SF_ALIAS) { 234 /* trailing ' ' in alias definition */ 235 source->flags &= ~SF_ALIAS; 236 /* POSIX: trailing space only counts if parsing simple cmd */ 237 if (!Flag(FPOSIX) || (cf & CMDWORD)) 238 cf |= ALIAS; 239 } 240 241 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 242 statep->type = state; 243 244 /* collect non-special or quoted characters to form word */ 245 while (!((c = getsc()) == 0 || 246 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { 247 if (state == SBASE && 248 subshell_nesting_type == /*{*/ '}' && 249 c == /*{*/ '}') 250 /* possibly end ${ :;} */ 251 break; 252 Xcheck(ws, wp); 253 switch (state) { 254 case SADELIM: 255 if (c == '(') 256 statep->nparen++; 257 else if (c == ')') 258 statep->nparen--; 259 else if (statep->nparen == 0 && (c == /*{*/ '}' || 260 c == (int)statep->ls_adelim.delimiter)) { 261 *wp++ = ADELIM; 262 *wp++ = c; 263 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) 264 POP_STATE(); 265 if (c == /*{*/ '}') 266 POP_STATE(); 267 break; 268 } 269 /* FALLTHROUGH */ 270 case SBASE: 271 if (c == '[' && (cf & CMDASN)) { 272 /* temporary */ 273 *wp = EOS; 274 if (is_wdvarname(Xstring(ws, wp), false)) { 275 char *p, *tmp; 276 277 if (arraysub(&tmp)) { 278 *wp++ = CHAR; 279 *wp++ = c; 280 for (p = tmp; *p; ) { 281 Xcheck(ws, wp); 282 *wp++ = CHAR; 283 *wp++ = *p++; 284 } 285 afree(tmp, ATEMP); 286 break; 287 } else { 288 Source *s; 289 290 s = pushs(SREREAD, 291 source->areap); 292 s->start = s->str = 293 s->u.freeme = tmp; 294 s->next = source; 295 source = s; 296 } 297 } 298 *wp++ = CHAR; 299 *wp++ = c; 300 break; 301 } 302 /* FALLTHROUGH */ 303 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 304 if (c == '*' || c == '@' || c == '+' || c == '?' || 305 c == '!') { 306 c2 = getsc(); 307 if (c2 == '(' /*)*/ ) { 308 *wp++ = OPAT; 309 *wp++ = c; 310 PUSH_STATE(SPATTERN); 311 break; 312 } 313 ungetsc(c2); 314 } 315 /* FALLTHROUGH */ 316 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 317 switch (c) { 318 case '\\': 319 getsc_qchar: 320 if ((c = getsc())) { 321 /* trailing \ is lost */ 322 *wp++ = QCHAR; 323 *wp++ = c; 324 } 325 break; 326 case '\'': 327 open_ssquote_unless_heredoc: 328 if ((cf & HEREDOC)) 329 goto store_char; 330 *wp++ = OQUOTE; 331 ignore_backslash_newline++; 332 PUSH_STATE(SSQUOTE); 333 break; 334 case '"': 335 open_sdquote: 336 *wp++ = OQUOTE; 337 PUSH_STATE(SDQUOTE); 338 break; 339 case '$': 340 /* 341 * processing of dollar sign belongs into 342 * Subst, except for those which can open 343 * a string: $'' and $"" 344 */ 345 subst_dollar_ex: 346 c = getsc(); 347 switch (c) { 348 case '"': 349 goto open_sdquote; 350 case '\'': 351 goto open_sequote; 352 default: 353 goto SubstS; 354 } 355 default: 356 goto Subst; 357 } 358 break; 359 360 Subst: 361 switch (c) { 362 case '\\': 363 c = getsc(); 364 switch (c) { 365 case '"': 366 if ((cf & HEREDOC)) 367 goto heredocquote; 368 /* FALLTHROUGH */ 369 case '\\': 370 case '$': case '`': 371 store_qchar: 372 *wp++ = QCHAR; 373 *wp++ = c; 374 break; 375 default: 376 heredocquote: 377 Xcheck(ws, wp); 378 if (c) { 379 /* trailing \ is lost */ 380 *wp++ = CHAR; 381 *wp++ = '\\'; 382 *wp++ = CHAR; 383 *wp++ = c; 384 } 385 break; 386 } 387 break; 388 case '$': 389 c = getsc(); 390 SubstS: 391 if (c == '(') /*)*/ { 392 c = getsc(); 393 if (c == '(') /*)*/ { 394 *wp++ = EXPRSUB; 395 PUSH_SRETRACE(SASPAREN); 396 statep->nparen = 2; 397 *retrace_info->xp++ = '('; 398 } else { 399 ungetsc(c); 400 subst_command: 401 c = COMSUB; 402 subst_command2: 403 sp = yyrecursive(c); 404 cz = strlen(sp) + 1; 405 XcheckN(ws, wp, cz); 406 *wp++ = c; 407 memcpy(wp, sp, cz); 408 wp += cz; 409 } 410 } else if (c == '{') /*}*/ { 411 if ((c = getsc()) == '|') { 412 /* 413 * non-subenvironment 414 * value substitution 415 */ 416 c = VALSUB; 417 goto subst_command2; 418 } else if (ctype(c, C_IFSWS)) { 419 /* 420 * non-subenvironment 421 * "command" substitution 422 */ 423 c = FUNSUB; 424 goto subst_command2; 425 } 426 ungetsc(c); 427 *wp++ = OSUBST; 428 *wp++ = '{'; /*}*/ 429 wp = get_brace_var(&ws, wp); 430 c = getsc(); 431 /* allow :# and :% (ksh88 compat) */ 432 if (c == ':') { 433 *wp++ = CHAR; 434 *wp++ = c; 435 c = getsc(); 436 if (c == ':') { 437 *wp++ = CHAR; 438 *wp++ = '0'; 439 *wp++ = ADELIM; 440 *wp++ = ':'; 441 PUSH_STATE(SBRACE); 442 PUSH_STATE(SADELIM); 443 statep->ls_adelim.delimiter = ':'; 444 statep->ls_adelim.num = 1; 445 statep->nparen = 0; 446 break; 447 } else if (ksh_isdigit(c) || 448 c == '('/*)*/ || c == ' ' || 449 /*XXX what else? */ 450 c == '$') { 451 /* substring subst. */ 452 if (c != ' ') { 453 *wp++ = CHAR; 454 *wp++ = ' '; 455 } 456 ungetsc(c); 457 PUSH_STATE(SBRACE); 458 PUSH_STATE(SADELIM); 459 statep->ls_adelim.delimiter = ':'; 460 statep->ls_adelim.num = 2; 461 statep->nparen = 0; 462 break; 463 } 464 } else if (c == '/') { 465 *wp++ = CHAR; 466 *wp++ = c; 467 if ((c = getsc()) == '/') { 468 *wp++ = ADELIM; 469 *wp++ = c; 470 } else 471 ungetsc(c); 472 PUSH_STATE(SBRACE); 473 PUSH_STATE(SADELIM); 474 statep->ls_adelim.delimiter = '/'; 475 statep->ls_adelim.num = 1; 476 statep->nparen = 0; 477 break; 478 } 479 /* 480 * If this is a trim operation, 481 * treat (,|,) specially in STBRACE. 482 */ 483 if (ctype(c, C_SUBOP2)) { 484 ungetsc(c); 485 if (Flag(FSH)) 486 PUSH_STATE(STBRACEBOURNE); 487 else 488 PUSH_STATE(STBRACEKORN); 489 } else { 490 ungetsc(c); 491 if (state == SDQUOTE || 492 state == SQBRACE) 493 PUSH_STATE(SQBRACE); 494 else 495 PUSH_STATE(SBRACE); 496 } 497 } else if (ksh_isalphx(c)) { 498 *wp++ = OSUBST; 499 *wp++ = 'X'; 500 do { 501 Xcheck(ws, wp); 502 *wp++ = c; 503 c = getsc(); 504 } while (ksh_isalnux(c)); 505 *wp++ = '\0'; 506 *wp++ = CSUBST; 507 *wp++ = 'X'; 508 ungetsc(c); 509 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 510 Xcheck(ws, wp); 511 *wp++ = OSUBST; 512 *wp++ = 'X'; 513 *wp++ = c; 514 *wp++ = '\0'; 515 *wp++ = CSUBST; 516 *wp++ = 'X'; 517 } else { 518 *wp++ = CHAR; 519 *wp++ = '$'; 520 ungetsc(c); 521 } 522 break; 523 case '`': 524 subst_gravis: 525 PUSH_STATE(SBQUOTE); 526 *wp++ = COMSUB; 527 /* 528 * We need to know whether we are within double 529 * quotes, since most shells translate \" to " 530 * within "`\"`". This is not done in POSIX 531 * mode (2.2.3 Double-Quotes: The backquote 532 * shall retain its special meaning introducing 533 * the other form of command substitution (see 534 * Command Substitution). The portion of the 535 * quoted string from the initial backquote and 536 * the characters up to the next backquote that 537 * is not preceded by a <backslash>, having 538 * escape characters removed, defines that 539 * command whose output replaces "`...`" when 540 * the word is expanded.; 2.6.3 Command 541 * Substitution: Within the backquoted style 542 * of command substitution, <backslash> shall 543 * retain its literal meaning, except when 544 * followed by: '$', '`', or <backslash>. The 545 * search for the matching backquote shall be 546 * satisfied by the first unquoted non-escaped 547 * backquote; during this search, if a 548 * non-escaped backquote is encountered[], 549 * undefined results occur.). 550 */ 551 statep->ls_bool = false; 552 #ifdef austingroupbugs1015_is_still_not_resolved 553 if (Flag(FPOSIX)) 554 break; 555 #endif 556 s2 = statep; 557 base = state_info.base; 558 while (/* CONSTCOND */ 1) { 559 for (; s2 != base; s2--) { 560 if (s2->type == SDQUOTE) { 561 statep->ls_bool = true; 562 break; 563 } 564 } 565 if (s2 != base) 566 break; 567 if (!(s2 = s2->ls_base)) 568 break; 569 base = s2-- - STATE_BSIZE; 570 } 571 break; 572 case QCHAR: 573 if (cf & LQCHAR) { 574 *wp++ = QCHAR; 575 *wp++ = getsc(); 576 break; 577 } 578 /* FALLTHROUGH */ 579 default: 580 store_char: 581 *wp++ = CHAR; 582 *wp++ = c; 583 } 584 break; 585 586 case SEQUOTE: 587 if (c == '\'') { 588 POP_STATE(); 589 *wp++ = CQUOTE; 590 ignore_backslash_newline--; 591 } else if (c == '\\') { 592 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1) 593 c2 = getsc(); 594 if (c2 == 0) 595 statep->ls_bool = true; 596 if (!statep->ls_bool) { 597 char ts[4]; 598 599 if ((unsigned int)c2 < 0x100) { 600 *wp++ = QCHAR; 601 *wp++ = c2; 602 } else { 603 cz = utf_wctomb(ts, c2 - 0x100); 604 ts[cz] = 0; 605 cz = 0; 606 do { 607 *wp++ = QCHAR; 608 *wp++ = ts[cz]; 609 } while (ts[++cz]); 610 } 611 } 612 } else if (!statep->ls_bool) { 613 *wp++ = QCHAR; 614 *wp++ = c; 615 } 616 break; 617 618 case SSQUOTE: 619 if (c == '\'') { 620 POP_STATE(); 621 if ((cf & HEREDOC) || state == SQBRACE) 622 goto store_char; 623 *wp++ = CQUOTE; 624 ignore_backslash_newline--; 625 } else { 626 *wp++ = QCHAR; 627 *wp++ = c; 628 } 629 break; 630 631 case SDQUOTE: 632 if (c == '"') { 633 POP_STATE(); 634 *wp++ = CQUOTE; 635 } else 636 goto Subst; 637 break; 638 639 /* $(( ... )) */ 640 case SASPAREN: 641 if (c == '(') 642 statep->nparen++; 643 else if (c == ')') { 644 statep->nparen--; 645 if (statep->nparen == 1) { 646 /* end of EXPRSUB */ 647 POP_SRETRACE(); 648 649 if ((c2 = getsc()) == /*(*/ ')') { 650 cz = strlen(sp) - 2; 651 XcheckN(ws, wp, cz); 652 memcpy(wp, sp + 1, cz); 653 wp += cz; 654 afree(sp, ATEMP); 655 *wp++ = '\0'; 656 break; 657 } else { 658 Source *s; 659 660 ungetsc(c2); 661 /* 662 * mismatched parenthesis - 663 * assume we were really 664 * parsing a $(...) expression 665 */ 666 --wp; 667 s = pushs(SREREAD, 668 source->areap); 669 s->start = s->str = 670 s->u.freeme = sp; 671 s->next = source; 672 source = s; 673 goto subst_command; 674 } 675 } 676 } 677 /* reuse existing state machine */ 678 goto Sbase2; 679 680 case SQBRACE: 681 if (c == '\\') { 682 /* 683 * perform POSIX "quote removal" if the back- 684 * slash is "special", i.e. same cases as the 685 * {case '\\':} in Subst: plus closing brace; 686 * in mksh code "quote removal" on '\c' means 687 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 688 * emitted (in heredocquote:) 689 */ 690 if ((c = getsc()) == '"' || c == '\\' || 691 c == '$' || c == '`' || c == /*{*/'}') 692 goto store_qchar; 693 goto heredocquote; 694 } 695 goto common_SQBRACE; 696 697 case SBRACE: 698 if (c == '\'') 699 goto open_ssquote_unless_heredoc; 700 else if (c == '\\') 701 goto getsc_qchar; 702 common_SQBRACE: 703 if (c == '"') 704 goto open_sdquote; 705 else if (c == '$') 706 goto subst_dollar_ex; 707 else if (c == '`') 708 goto subst_gravis; 709 else if (c != /*{*/ '}') 710 goto store_char; 711 POP_STATE(); 712 *wp++ = CSUBST; 713 *wp++ = /*{*/ '}'; 714 break; 715 716 /* Same as SBASE, except (,|,) treated specially */ 717 case STBRACEKORN: 718 if (c == '|') 719 *wp++ = SPAT; 720 else if (c == '(') { 721 *wp++ = OPAT; 722 /* simile for @ */ 723 *wp++ = ' '; 724 PUSH_STATE(SPATTERN); 725 } else /* FALLTHROUGH */ 726 case STBRACEBOURNE: 727 if (c == /*{*/ '}') { 728 POP_STATE(); 729 *wp++ = CSUBST; 730 *wp++ = /*{*/ '}'; 731 } else 732 goto Sbase1; 733 break; 734 735 case SBQUOTE: 736 if (c == '`') { 737 *wp++ = 0; 738 POP_STATE(); 739 } else if (c == '\\') { 740 switch (c = getsc()) { 741 case 0: 742 /* trailing \ is lost */ 743 break; 744 case '$': 745 case '`': 746 case '\\': 747 *wp++ = c; 748 break; 749 case '"': 750 if (statep->ls_bool) { 751 *wp++ = c; 752 break; 753 } 754 /* FALLTHROUGH */ 755 default: 756 *wp++ = '\\'; 757 *wp++ = c; 758 break; 759 } 760 } else 761 *wp++ = c; 762 break; 763 764 /* ONEWORD */ 765 case SWORD: 766 goto Subst; 767 768 /* LETEXPR: (( ... )) */ 769 case SLETPAREN: 770 if (c == /*(*/ ')') { 771 if (statep->nparen > 0) 772 --statep->nparen; 773 else if ((c2 = getsc()) == /*(*/ ')') { 774 c = 0; 775 *wp++ = CQUOTE; 776 goto Done; 777 } else { 778 Source *s; 779 780 ungetsc(c2); 781 ungetsc(c); 782 /* 783 * mismatched parenthesis - 784 * assume we were really 785 * parsing a (...) expression 786 */ 787 *wp = EOS; 788 sp = Xstring(ws, wp); 789 dp = wdstrip(sp + 1, WDS_TPUTS); 790 s = pushs(SREREAD, source->areap); 791 s->start = s->str = s->u.freeme = dp; 792 s->next = source; 793 source = s; 794 ungetsc('('/*)*/); 795 return ('('/*)*/); 796 } 797 } else if (c == '(') 798 /* 799 * parentheses inside quotes and 800 * backslashes are lost, but AT&T ksh 801 * doesn't count them either 802 */ 803 ++statep->nparen; 804 goto Sbase2; 805 806 /* << or <<- delimiter */ 807 case SHEREDELIM: 808 /* 809 * here delimiters need a special case since 810 * $ and `...` are not to be treated specially 811 */ 812 switch (c) { 813 case '\\': 814 if ((c = getsc())) { 815 /* trailing \ is lost */ 816 *wp++ = QCHAR; 817 *wp++ = c; 818 } 819 break; 820 case '\'': 821 goto open_ssquote_unless_heredoc; 822 case '$': 823 if ((c2 = getsc()) == '\'') { 824 open_sequote: 825 *wp++ = OQUOTE; 826 ignore_backslash_newline++; 827 PUSH_STATE(SEQUOTE); 828 statep->ls_bool = false; 829 break; 830 } else if (c2 == '"') { 831 /* FALLTHROUGH */ 832 case '"': 833 PUSH_SRETRACE(SHEREDQUOTE); 834 break; 835 } 836 ungetsc(c2); 837 /* FALLTHROUGH */ 838 default: 839 *wp++ = CHAR; 840 *wp++ = c; 841 } 842 break; 843 844 /* " in << or <<- delimiter */ 845 case SHEREDQUOTE: 846 if (c != '"') 847 goto Subst; 848 POP_SRETRACE(); 849 dp = strnul(sp) - 1; 850 /* remove the trailing double quote */ 851 *dp = '\0'; 852 /* store the quoted string */ 853 *wp++ = OQUOTE; 854 XcheckN(ws, wp, (dp - sp) * 2); 855 dp = sp; 856 while ((c = *dp++)) { 857 if (c == '\\') { 858 switch ((c = *dp++)) { 859 case '\\': 860 case '"': 861 case '$': 862 case '`': 863 break; 864 default: 865 *wp++ = CHAR; 866 *wp++ = '\\'; 867 break; 868 } 869 } 870 *wp++ = CHAR; 871 *wp++ = c; 872 } 873 afree(sp, ATEMP); 874 *wp++ = CQUOTE; 875 state = statep->type = SHEREDELIM; 876 break; 877 878 /* in *(...|...) pattern (*+?@!) */ 879 case SPATTERN: 880 if (c == /*(*/ ')') { 881 *wp++ = CPAT; 882 POP_STATE(); 883 } else if (c == '|') { 884 *wp++ = SPAT; 885 } else if (c == '(') { 886 *wp++ = OPAT; 887 /* simile for @ */ 888 *wp++ = ' '; 889 PUSH_STATE(SPATTERN); 890 } else 891 goto Sbase1; 892 break; 893 } 894 } 895 Done: 896 Xcheck(ws, wp); 897 if (statep != &states[1]) 898 /* XXX figure out what is missing */ 899 yyerror("no closing quote\n"); 900 901 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 902 if (state == SHEREDELIM) 903 state = SBASE; 904 905 dp = Xstring(ws, wp); 906 if (state == SBASE && ( 907 #ifndef MKSH_LEGACY_MODE 908 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || 909 #endif 910 c == '<' || c == '>')) { 911 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 912 913 if (Xlength(ws, wp) == 0) 914 iop->unit = c == '<' ? 0 : 1; 915 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { 916 if (dp[c2] != CHAR) 917 goto no_iop; 918 if (!ksh_isdigit(dp[c2 + 1])) 919 goto no_iop; 920 iop->unit = iop->unit * 10 + ksh_numdig(dp[c2 + 1]); 921 if (iop->unit >= FDBASE) 922 goto no_iop; 923 } 924 925 if (c == '&') { 926 if ((c2 = getsc()) != '>') { 927 ungetsc(c2); 928 goto no_iop; 929 } 930 c = c2; 931 iop->ioflag = IOBASH; 932 } else 933 iop->ioflag = 0; 934 935 c2 = getsc(); 936 /* <<, >>, <> are ok, >< is not */ 937 if (c == c2 || (c == '<' && c2 == '>')) { 938 iop->ioflag |= c == c2 ? 939 (c == '>' ? IOCAT : IOHERE) : IORDWR; 940 if (iop->ioflag == IOHERE) { 941 if ((c2 = getsc()) == '-') 942 iop->ioflag |= IOSKIP; 943 else if (c2 == '<') 944 iop->ioflag |= IOHERESTR; 945 else 946 ungetsc(c2); 947 } 948 } else if (c2 == '&') 949 iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0); 950 else { 951 iop->ioflag |= c == '>' ? IOWRITE : IOREAD; 952 if (c == '>' && c2 == '|') 953 iop->ioflag |= IOCLOB; 954 else 955 ungetsc(c2); 956 } 957 958 iop->ioname = NULL; 959 iop->delim = NULL; 960 iop->heredoc = NULL; 961 /* free word */ 962 Xfree(ws, wp); 963 yylval.iop = iop; 964 return (REDIR); 965 no_iop: 966 afree(iop, ATEMP); 967 } 968 969 if (wp == dp && state == SBASE) { 970 /* free word */ 971 Xfree(ws, wp); 972 /* no word, process LEX1 character */ 973 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { 974 if ((c2 = getsc()) == c) 975 c = (c == ';') ? BREAK : 976 (c == '|') ? LOGOR : 977 (c == '&') ? LOGAND : 978 /* c == '(' ) */ MDPAREN; 979 else if (c == '|' && c2 == '&') 980 c = COPROC; 981 else if (c == ';' && c2 == '|') 982 c = BRKEV; 983 else if (c == ';' && c2 == '&') 984 c = BRKFT; 985 else 986 ungetsc(c2); 987 #ifndef MKSH_SMALL 988 if (c == BREAK) { 989 if ((c2 = getsc()) == '&') 990 c = BRKEV; 991 else 992 ungetsc(c2); 993 } 994 #endif 995 } else if (c == '\n') { 996 if (cf & HEREDELIM) 997 ungetsc(c); 998 else { 999 gethere(); 1000 if (cf & CONTIN) 1001 goto Again; 1002 } 1003 } 1004 return (c); 1005 } 1006 1007 /* terminate word */ 1008 *wp++ = EOS; 1009 yylval.cp = Xclose(ws, wp); 1010 if (state == SWORD || state == SLETPAREN 1011 /* XXX ONEWORD? */) 1012 return (LWORD); 1013 1014 /* unget terminator */ 1015 ungetsc(c); 1016 1017 /* 1018 * note: the alias-vs-function code below depends on several 1019 * interna: starting from here, source->str is not modified; 1020 * the way getsc() and ungetsc() operate; etc. 1021 */ 1022 1023 /* copy word to unprefixed string ident */ 1024 sp = yylval.cp; 1025 dp = ident; 1026 while ((dp - ident) < IDENT && (c = *sp++) == CHAR) 1027 *dp++ = *sp++; 1028 if (c != EOS) 1029 /* word is not unquoted */ 1030 dp = ident; 1031 /* make sure the ident array stays NUL padded */ 1032 memset(dp, 0, (ident + IDENT) - dp + 1); 1033 1034 if (!(cf & (KEYWORD | ALIAS))) 1035 return (LWORD); 1036 1037 if (*ident != '\0') { 1038 struct tbl *p; 1039 uint32_t h = hash(ident); 1040 1041 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1042 (!(cf & ESACONLY) || p->val.i == ESAC || 1043 p->val.i == /*{*/ '}')) { 1044 afree(yylval.cp, ATEMP); 1045 return (p->val.i); 1046 } 1047 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1048 (p->flag & ISSET)) { 1049 /* 1050 * this still points to the same character as the 1051 * ungetsc'd terminator from above 1052 */ 1053 const char *cp = source->str; 1054 1055 /* prefer POSIX but not Korn functions over aliases */ 1056 while (*cp == ' ' || *cp == '\t') 1057 /* 1058 * this is like getsc() without skipping 1059 * over Source boundaries (including not 1060 * parsing ungetsc'd characters that got 1061 * pushed into an SREREAD) which is what 1062 * we want here anyway: find out whether 1063 * the alias name is followed by a POSIX 1064 * function definition 1065 */ 1066 ++cp; 1067 /* prefer functions over aliases */ 1068 if (cp[0] != '(' || cp[1] != ')') { 1069 Source *s = source; 1070 1071 while (s && (s->flags & SF_HASALIAS)) 1072 if (s->u.tblp == p) 1073 return (LWORD); 1074 else 1075 s = s->next; 1076 /* push alias expansion */ 1077 s = pushs(SALIAS, source->areap); 1078 s->start = s->str = p->val.s; 1079 s->u.tblp = p; 1080 s->flags |= SF_HASALIAS; 1081 s->next = source; 1082 if (source->type == SEOF) { 1083 /* prevent infinite recursion at EOS */ 1084 source->u.tblp = p; 1085 source->flags |= SF_HASALIAS; 1086 } 1087 source = s; 1088 afree(yylval.cp, ATEMP); 1089 goto Again; 1090 } 1091 } 1092 } else if (cf & ALIAS) { 1093 /* retain typeset et al. even when quoted */ 1094 if (assign_command((dp = wdstrip(yylval.cp, 0)), true)) 1095 strlcpy(ident, dp, sizeof(ident)); 1096 afree(dp, ATEMP); 1097 } 1098 1099 return (LWORD); 1100 } 1101 1102 static void 1103 gethere(void) 1104 { 1105 struct ioword **p; 1106 1107 for (p = heres; p < herep; p++) 1108 if (!((*p)->ioflag & IOHERESTR)) 1109 readhere(*p); 1110 herep = heres; 1111 } 1112 1113 /* 1114 * read "<<word" text into temp file 1115 */ 1116 1117 static void 1118 readhere(struct ioword *iop) 1119 { 1120 int c; 1121 const char *eof, *eofp; 1122 XString xs; 1123 char *xp; 1124 size_t xpos; 1125 1126 eof = evalstr(iop->delim, 0); 1127 1128 if (!(iop->ioflag & IOEVAL)) 1129 ignore_backslash_newline++; 1130 1131 Xinit(xs, xp, 256, ATEMP); 1132 1133 heredoc_read_line: 1134 /* beginning of line */ 1135 eofp = eof; 1136 xpos = Xsavepos(xs, xp); 1137 if (iop->ioflag & IOSKIP) { 1138 /* skip over leading tabs */ 1139 while ((c = getsc()) == '\t') 1140 ; /* nothing */ 1141 goto heredoc_parse_char; 1142 } 1143 heredoc_read_char: 1144 c = getsc(); 1145 heredoc_parse_char: 1146 /* compare with here document marker */ 1147 if (!*eofp) { 1148 /* end of here document marker, what to do? */ 1149 switch (c) { 1150 case /*(*/ ')': 1151 if (!subshell_nesting_type) 1152 /*- 1153 * not allowed outside $(...) or (...) 1154 * => mismatch 1155 */ 1156 break; 1157 /* allow $(...) or (...) to close here */ 1158 ungetsc(/*(*/ ')'); 1159 /* FALLTHROUGH */ 1160 case 0: 1161 /* 1162 * Allow EOF here to commands without trailing 1163 * newlines (mksh -c '...') will work as well. 1164 */ 1165 case '\n': 1166 /* Newline terminates here document marker */ 1167 goto heredoc_found_terminator; 1168 } 1169 } else if (c == *eofp++) 1170 /* store; then read and compare next character */ 1171 goto heredoc_store_and_loop; 1172 /* nope, mismatch; read until end of line */ 1173 while (c != '\n') { 1174 if (!c) 1175 /* oops, reached EOF */ 1176 yyerror("%s '%s' unclosed\n", "here document", eof); 1177 /* store character */ 1178 Xcheck(xs, xp); 1179 Xput(xs, xp, c); 1180 /* read next character */ 1181 c = getsc(); 1182 } 1183 /* we read a newline as last character */ 1184 heredoc_store_and_loop: 1185 /* store character */ 1186 Xcheck(xs, xp); 1187 Xput(xs, xp, c); 1188 if (c == '\n') 1189 goto heredoc_read_line; 1190 goto heredoc_read_char; 1191 1192 heredoc_found_terminator: 1193 /* jump back to saved beginning of line */ 1194 xp = Xrestpos(xs, xp, xpos); 1195 /* terminate, close and store */ 1196 Xput(xs, xp, '\0'); 1197 iop->heredoc = Xclose(xs, xp); 1198 1199 if (!(iop->ioflag & IOEVAL)) 1200 ignore_backslash_newline--; 1201 } 1202 1203 void 1204 yyerror(const char *fmt, ...) 1205 { 1206 va_list va; 1207 1208 /* pop aliases and re-reads */ 1209 while (source->type == SALIAS || source->type == SREREAD) 1210 source = source->next; 1211 /* zap pending input */ 1212 source->str = null; 1213 1214 error_prefix(true); 1215 va_start(va, fmt); 1216 shf_vfprintf(shl_out, fmt, va); 1217 va_end(va); 1218 errorfz(); 1219 } 1220 1221 /* 1222 * input for yylex with alias expansion 1223 */ 1224 1225 Source * 1226 pushs(int type, Area *areap) 1227 { 1228 Source *s; 1229 1230 s = alloc(sizeof(Source), areap); 1231 memset(s, 0, sizeof(Source)); 1232 s->type = type; 1233 s->str = null; 1234 s->areap = areap; 1235 if (type == SFILE || type == SSTDIN) 1236 XinitN(s->xs, 256, s->areap); 1237 return (s); 1238 } 1239 1240 static int 1241 getsc_uu(void) 1242 { 1243 Source *s = source; 1244 int c; 1245 1246 while ((c = *s->str++) == 0) { 1247 /* return 0 for EOF by default */ 1248 s->str = NULL; 1249 switch (s->type) { 1250 case SEOF: 1251 s->str = null; 1252 return (0); 1253 1254 case SSTDIN: 1255 case SFILE: 1256 getsc_line(s); 1257 break; 1258 1259 case SWSTR: 1260 break; 1261 1262 case SSTRING: 1263 case SSTRINGCMDLINE: 1264 break; 1265 1266 case SWORDS: 1267 s->start = s->str = *s->u.strv++; 1268 s->type = SWORDSEP; 1269 break; 1270 1271 case SWORDSEP: 1272 if (*s->u.strv == NULL) { 1273 s->start = s->str = "\n"; 1274 s->type = SEOF; 1275 } else { 1276 s->start = s->str = " "; 1277 s->type = SWORDS; 1278 } 1279 break; 1280 1281 case SALIAS: 1282 if (s->flags & SF_ALIASEND) { 1283 /* pass on an unused SF_ALIAS flag */ 1284 source = s->next; 1285 source->flags |= s->flags & SF_ALIAS; 1286 s = source; 1287 } else if (*s->u.tblp->val.s && 1288 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { 1289 /* pop source stack */ 1290 source = s = s->next; 1291 /* 1292 * Note that this alias ended with a 1293 * space, enabling alias expansion on 1294 * the following word. 1295 */ 1296 s->flags |= SF_ALIAS; 1297 } else { 1298 /* 1299 * At this point, we need to keep the current 1300 * alias in the source list so recursive 1301 * aliases can be detected and we also need to 1302 * return the next character. Do this by 1303 * temporarily popping the alias to get the 1304 * next character and then put it back in the 1305 * source list with the SF_ALIASEND flag set. 1306 */ 1307 /* pop source stack */ 1308 source = s->next; 1309 source->flags |= s->flags & SF_ALIAS; 1310 c = getsc_uu(); 1311 if (c) { 1312 s->flags |= SF_ALIASEND; 1313 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1314 s->start = s->str = s->ugbuf; 1315 s->next = source; 1316 source = s; 1317 } else { 1318 s = source; 1319 /* avoid reading EOF twice */ 1320 s->str = NULL; 1321 break; 1322 } 1323 } 1324 continue; 1325 1326 case SREREAD: 1327 if (s->start != s->ugbuf) 1328 /* yuck */ 1329 afree(s->u.freeme, ATEMP); 1330 source = s = s->next; 1331 continue; 1332 } 1333 if (s->str == NULL) { 1334 s->type = SEOF; 1335 s->start = s->str = null; 1336 return ('\0'); 1337 } 1338 if (s->flags & SF_ECHO) { 1339 shf_puts(s->str, shl_out); 1340 shf_flush(shl_out); 1341 } 1342 } 1343 return (c); 1344 } 1345 1346 static void 1347 getsc_line(Source *s) 1348 { 1349 char *xp = Xstring(s->xs, xp), *cp; 1350 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1351 bool have_tty = tobool(interactive && (s->flags & SF_TTY)); 1352 1353 /* Done here to ensure nothing odd happens when a timeout occurs */ 1354 XcheckN(s->xs, xp, LINE); 1355 *xp = '\0'; 1356 s->start = s->str = xp; 1357 1358 if (have_tty && ksh_tmout) { 1359 ksh_tmout_state = TMOUT_READING; 1360 alarm(ksh_tmout); 1361 } 1362 if (interactive) 1363 change_winsz(); 1364 #ifndef MKSH_NO_CMDLINE_EDITING 1365 if (have_tty && ( 1366 #if !MKSH_S_NOVI 1367 Flag(FVI) || 1368 #endif 1369 Flag(FEMACS) || Flag(FGMACS))) { 1370 int nread; 1371 1372 nread = x_read(xp); 1373 if (nread < 0) 1374 /* read error */ 1375 nread = 0; 1376 xp[nread] = '\0'; 1377 xp += nread; 1378 } else 1379 #endif 1380 { 1381 if (interactive) 1382 pprompt(prompt, 0); 1383 else 1384 s->line++; 1385 1386 while (/* CONSTCOND */ 1) { 1387 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1388 1389 if (!p && shf_error(s->u.shf) && 1390 shf_errno(s->u.shf) == EINTR) { 1391 shf_clearerr(s->u.shf); 1392 if (trap) 1393 runtraps(0); 1394 continue; 1395 } 1396 if (!p || (xp = p, xp[-1] == '\n')) 1397 break; 1398 /* double buffer size */ 1399 /* move past NUL so doubling works... */ 1400 xp++; 1401 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1402 /* ...and move back again */ 1403 xp--; 1404 } 1405 /* 1406 * flush any unwanted input so other programs/builtins 1407 * can read it. Not very optimal, but less error prone 1408 * than flushing else where, dealing with redirections, 1409 * etc. 1410 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1411 */ 1412 if (s->type == SSTDIN) 1413 shf_flush(s->u.shf); 1414 } 1415 /* 1416 * XXX: temporary kludge to restore source after a 1417 * trap may have been executed. 1418 */ 1419 source = s; 1420 if (have_tty && ksh_tmout) { 1421 ksh_tmout_state = TMOUT_EXECUTING; 1422 alarm(0); 1423 } 1424 cp = Xstring(s->xs, xp); 1425 rndpush(cp); 1426 s->start = s->str = cp; 1427 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1428 /* Note: if input is all nulls, this is not eof */ 1429 if (Xlength(s->xs, xp) == 0) { 1430 /* EOF */ 1431 if (s->type == SFILE) 1432 shf_fdclose(s->u.shf); 1433 s->str = NULL; 1434 } else if (interactive && *s->str) { 1435 if (cur_prompt != PS1) 1436 histsave(&s->line, s->str, HIST_APPEND, true); 1437 else if (!ctype(*s->str, C_IFS | C_IFSWS)) 1438 histsave(&s->line, s->str, HIST_QUEUE, true); 1439 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1440 else 1441 goto check_for_sole_return; 1442 } else if (interactive && cur_prompt == PS1) { 1443 check_for_sole_return: 1444 cp = Xstring(s->xs, xp); 1445 while (*cp && ctype(*cp, C_IFSWS)) 1446 ++cp; 1447 if (!*cp) { 1448 histsave(&s->line, NULL, HIST_FLUSH, true); 1449 histsync(); 1450 } 1451 #endif 1452 } 1453 if (interactive) 1454 set_prompt(PS2, NULL); 1455 } 1456 1457 void 1458 set_prompt(int to, Source *s) 1459 { 1460 cur_prompt = (uint8_t)to; 1461 1462 switch (to) { 1463 /* command */ 1464 case PS1: 1465 /* 1466 * Substitute ! and !! here, before substitutions are done 1467 * so ! in expanded variables are not expanded. 1468 * NOTE: this is not what AT&T ksh does (it does it after 1469 * substitutions, POSIX doesn't say which is to be done. 1470 */ 1471 { 1472 struct shf *shf; 1473 char * volatile ps1; 1474 Area *saved_atemp; 1475 int saved_lineno; 1476 1477 ps1 = str_val(global("PS1")); 1478 shf = shf_sopen(NULL, strlen(ps1) * 2, 1479 SHF_WR | SHF_DYNAMIC, NULL); 1480 while (*ps1) 1481 if (*ps1 != '!' || *++ps1 == '!') 1482 shf_putchar(*ps1++, shf); 1483 else 1484 shf_fprintf(shf, "%lu", s ? 1485 (unsigned long)s->line + 1 : 0UL); 1486 ps1 = shf_sclose(shf); 1487 saved_lineno = current_lineno; 1488 if (s) 1489 current_lineno = s->line + 1; 1490 saved_atemp = ATEMP; 1491 newenv(E_ERRH); 1492 if (kshsetjmp(e->jbuf)) { 1493 prompt = safe_prompt; 1494 /* 1495 * Don't print an error - assume it has already 1496 * been printed. Reason is we may have forked 1497 * to run a command and the child may be 1498 * unwinding its stack through this code as it 1499 * exits. 1500 */ 1501 } else { 1502 char *cp = substitute(ps1, 0); 1503 strdupx(prompt, cp, saved_atemp); 1504 } 1505 current_lineno = saved_lineno; 1506 quitenv(NULL); 1507 } 1508 break; 1509 /* command continuation */ 1510 case PS2: 1511 prompt = str_val(global("PS2")); 1512 break; 1513 } 1514 } 1515 1516 int 1517 pprompt(const char *cp, int ntruncate) 1518 { 1519 char delimiter = 0; 1520 bool doprint = (ntruncate != -1); 1521 bool indelimit = false; 1522 int columns = 0, lines = 0; 1523 1524 /* 1525 * Undocumented AT&T ksh feature: 1526 * If the second char in the prompt string is \r then the first 1527 * char is taken to be a non-printing delimiter and any chars 1528 * between two instances of the delimiter are not considered to 1529 * be part of the prompt length 1530 */ 1531 if (*cp && cp[1] == '\r') { 1532 delimiter = *cp; 1533 cp += 2; 1534 } 1535 for (; *cp; cp++) { 1536 if (indelimit && *cp != delimiter) 1537 ; 1538 else if (*cp == '\n' || *cp == '\r') { 1539 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1540 columns = 0; 1541 } else if (*cp == '\t') { 1542 columns = (columns | 7) + 1; 1543 } else if (*cp == '\b') { 1544 if (columns > 0) 1545 columns--; 1546 } else if (*cp == delimiter) 1547 indelimit = !indelimit; 1548 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { 1549 const char *cp2; 1550 columns += utf_widthadj(cp, &cp2); 1551 if (doprint && (indelimit || 1552 (ntruncate < (x_cols * lines + columns)))) 1553 shf_write(cp, cp2 - cp, shl_out); 1554 cp = cp2 - /* loop increment */ 1; 1555 continue; 1556 } else 1557 columns++; 1558 if (doprint && (*cp != delimiter) && 1559 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1560 shf_putc(*cp, shl_out); 1561 } 1562 if (doprint) 1563 shf_flush(shl_out); 1564 return (x_cols * lines + columns); 1565 } 1566 1567 /* 1568 * Read the variable part of a ${...} expression (i.e. up to but not 1569 * including the :[-+?=#%] or close-brace). 1570 */ 1571 static char * 1572 get_brace_var(XString *wsp, char *wp) 1573 { 1574 char c; 1575 enum parse_state { 1576 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1577 PS_NUMBER, PS_VAR1 1578 } state = PS_INITIAL; 1579 1580 while (/* CONSTCOND */ 1) { 1581 c = getsc(); 1582 /* State machine to figure out where the variable part ends. */ 1583 switch (state) { 1584 case PS_INITIAL: 1585 if (c == '#' || c == '!' || c == '%') { 1586 state = PS_SAW_HASH; 1587 break; 1588 } 1589 /* FALLTHROUGH */ 1590 case PS_SAW_HASH: 1591 if (ksh_isalphx(c)) 1592 state = PS_IDENT; 1593 else if (ksh_isdigit(c)) 1594 state = PS_NUMBER; 1595 else if (c == '#') { 1596 if (state == PS_SAW_HASH) { 1597 char c2; 1598 1599 c2 = getsc(); 1600 ungetsc(c2); 1601 if (c2 != /*{*/ '}') { 1602 ungetsc(c); 1603 goto out; 1604 } 1605 } 1606 state = PS_VAR1; 1607 } else if (ctype(c, C_VAR1)) 1608 state = PS_VAR1; 1609 else 1610 goto out; 1611 break; 1612 case PS_IDENT: 1613 if (!ksh_isalnux(c)) { 1614 if (c == '[') { 1615 char *tmp, *p; 1616 1617 if (!arraysub(&tmp)) 1618 yyerror("missing ]\n"); 1619 *wp++ = c; 1620 for (p = tmp; *p; ) { 1621 Xcheck(*wsp, wp); 1622 *wp++ = *p++; 1623 } 1624 afree(tmp, ATEMP); 1625 /* the ] */ 1626 c = getsc(); 1627 } 1628 goto out; 1629 } 1630 break; 1631 case PS_NUMBER: 1632 if (!ksh_isdigit(c)) 1633 goto out; 1634 break; 1635 case PS_VAR1: 1636 goto out; 1637 } 1638 Xcheck(*wsp, wp); 1639 *wp++ = c; 1640 } 1641 out: 1642 /* end of variable part */ 1643 *wp++ = '\0'; 1644 ungetsc(c); 1645 return (wp); 1646 } 1647 1648 /* 1649 * Save an array subscript - returns true if matching bracket found, false 1650 * if eof or newline was found. 1651 * (Returned string double null terminated) 1652 */ 1653 static bool 1654 arraysub(char **strp) 1655 { 1656 XString ws; 1657 char *wp, c; 1658 /* we are just past the initial [ */ 1659 unsigned int depth = 1; 1660 1661 Xinit(ws, wp, 32, ATEMP); 1662 1663 do { 1664 c = getsc(); 1665 Xcheck(ws, wp); 1666 *wp++ = c; 1667 if (c == '[') 1668 depth++; 1669 else if (c == ']') 1670 depth--; 1671 } while (depth > 0 && c && c != '\n'); 1672 1673 *wp++ = '\0'; 1674 *strp = Xclose(ws, wp); 1675 1676 return (tobool(depth == 0)); 1677 } 1678 1679 /* Unget a char: handles case when we are already at the start of the buffer */ 1680 static void 1681 ungetsc(int c) 1682 { 1683 struct sretrace_info *rp = retrace_info; 1684 1685 if (backslash_skip) 1686 backslash_skip--; 1687 /* Don't unget EOF... */ 1688 if (source->str == null && c == '\0') 1689 return; 1690 while (rp) { 1691 if (Xlength(rp->xs, rp->xp)) 1692 rp->xp--; 1693 rp = rp->next; 1694 } 1695 ungetsc_i(c); 1696 } 1697 static void 1698 ungetsc_i(int c) 1699 { 1700 if (source->str > source->start) 1701 source->str--; 1702 else { 1703 Source *s; 1704 1705 s = pushs(SREREAD, source->areap); 1706 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1707 s->start = s->str = s->ugbuf; 1708 s->next = source; 1709 source = s; 1710 } 1711 } 1712 1713 1714 /* Called to get a char that isn't a \newline sequence. */ 1715 static int 1716 getsc_bn(void) 1717 { 1718 int c, c2; 1719 1720 if (ignore_backslash_newline) 1721 return (o_getsc_u()); 1722 1723 if (backslash_skip == 1) { 1724 backslash_skip = 2; 1725 return (o_getsc_u()); 1726 } 1727 1728 backslash_skip = 0; 1729 1730 while (/* CONSTCOND */ 1) { 1731 c = o_getsc_u(); 1732 if (c == '\\') { 1733 if ((c2 = o_getsc_u()) == '\n') 1734 /* ignore the \newline; get the next char... */ 1735 continue; 1736 ungetsc_i(c2); 1737 backslash_skip = 1; 1738 } 1739 return (c); 1740 } 1741 } 1742 1743 void 1744 yyskiputf8bom(void) 1745 { 1746 int c; 1747 1748 if ((unsigned char)(c = o_getsc_u()) != 0xEF) { 1749 ungetsc_i(c); 1750 return; 1751 } 1752 if ((unsigned char)(c = o_getsc_u()) != 0xBB) { 1753 ungetsc_i(c); 1754 ungetsc_i(0xEF); 1755 return; 1756 } 1757 if ((unsigned char)(c = o_getsc_u()) != 0xBF) { 1758 ungetsc_i(c); 1759 ungetsc_i(0xBB); 1760 ungetsc_i(0xEF); 1761 return; 1762 } 1763 UTFMODE |= 8; 1764 } 1765 1766 static Lex_state * 1767 push_state_i(State_info *si, Lex_state *old_end) 1768 { 1769 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1770 1771 news[0].ls_base = old_end; 1772 si->base = &news[0]; 1773 si->end = &news[STATE_BSIZE]; 1774 return (&news[1]); 1775 } 1776 1777 static Lex_state * 1778 pop_state_i(State_info *si, Lex_state *old_end) 1779 { 1780 Lex_state *old_base = si->base; 1781 1782 si->base = old_end->ls_base - STATE_BSIZE; 1783 si->end = old_end->ls_base; 1784 1785 afree(old_base, ATEMP); 1786 1787 return (si->base + STATE_BSIZE - 1); 1788 } 1789