1 /* $OpenBSD: lex.c,v 1.49 2013/12/17 16:37:06 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 * 2011, 2012, 2013, 2014 6 * Thorsten Glaser <tg (at) mirbsd.org> 7 * 8 * Provided that these terms and disclaimer and all copyright notices 9 * are retained or reproduced in an accompanying document, permission 10 * is granted to deal in this work without restriction, including un- 11 * limited rights to use, publicly perform, distribute, sell, modify, 12 * merge, give away, or sublicence. 13 * 14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 15 * the utmost extent permitted by applicable law, neither express nor 16 * implied; without malicious intent or gross negligence. In no event 17 * may a licensor, author or contributor be held liable for indirect, 18 * direct, other damage, loss, or other issues arising in any way out 19 * of dealing in the work, even if advised of the possibility of such 20 * damage or existence of a defect, except proven that it results out 21 * of said person's immediate fault when using the work as intended. 22 */ 23 24 #include "sh.h" 25 26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.193 2014/06/29 11:28:28 tg Exp $"); 27 28 /* 29 * states while lexing word 30 */ 31 #define SBASE 0 /* outside any lexical constructs */ 32 #define SWORD 1 /* implicit quoting for substitute() */ 33 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 34 #define SSQUOTE 3 /* inside '' */ 35 #define SDQUOTE 4 /* inside "" */ 36 #define SEQUOTE 5 /* inside $'' */ 37 #define SBRACE 6 /* inside ${} */ 38 #define SQBRACE 7 /* inside "${}" */ 39 #define SBQUOTE 8 /* inside `` */ 40 #define SASPAREN 9 /* inside $(( )) */ 41 #define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */ 42 #define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */ 43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 44 #define SADELIM 13 /* like SBASE, looking for delimiter */ 45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 int start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 } Lex_state; 81 #define ls_base u.base 82 #define ls_start u.start 83 #define ls_bool u.abool 84 #define ls_adelim u.adelim 85 86 typedef struct { 87 Lex_state *base; 88 Lex_state *end; 89 } State_info; 90 91 static void readhere(struct ioword *); 92 static void ungetsc(int); 93 static void ungetsc_i(int); 94 static int getsc_uu(void); 95 static void getsc_line(Source *); 96 static int getsc_bn(void); 97 static int s_get(void); 98 static void s_put(int); 99 static char *get_brace_var(XString *, char *); 100 static bool arraysub(char **); 101 static void gethere(bool); 102 static Lex_state *push_state_i(State_info *, Lex_state *); 103 static Lex_state *pop_state_i(State_info *, Lex_state *); 104 105 static int backslash_skip; 106 static int ignore_backslash_newline; 107 108 /* optimised getsc_bn() */ 109 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 110 !backslash_skip ? *source->str++ : getsc_bn()) 111 /* optimised getsc_uu() */ 112 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 113 114 /* retrace helper */ 115 #define o_getsc_r(carg) { \ 116 int cev = (carg); \ 117 struct sretrace_info *rp = retrace_info; \ 118 \ 119 while (rp) { \ 120 Xcheck(rp->xs, rp->xp); \ 121 *rp->xp++ = cev; \ 122 rp = rp->next; \ 123 } \ 124 \ 125 return (cev); \ 126 } 127 128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) 129 static int getsc(void); 130 131 static int 132 getsc(void) 133 { 134 o_getsc_r(o_getsc()); 135 } 136 #else 137 static int getsc_r(int); 138 139 static int 140 getsc_r(int c) 141 { 142 o_getsc_r(c); 143 } 144 145 #define getsc() getsc_r(o_getsc()) 146 #endif 147 148 #define STATE_BSIZE 8 149 150 #define PUSH_STATE(s) do { \ 151 if (++statep == state_info.end) \ 152 statep = push_state_i(&state_info, statep); \ 153 state = statep->type = (s); \ 154 } while (/* CONSTCOND */ 0) 155 156 #define POP_STATE() do { \ 157 if (--statep == state_info.base) \ 158 statep = pop_state_i(&state_info, statep); \ 159 state = statep->type; \ 160 } while (/* CONSTCOND */ 0) 161 162 #define PUSH_SRETRACE(s) do { \ 163 struct sretrace_info *ri; \ 164 \ 165 PUSH_STATE(s); \ 166 statep->ls_start = Xsavepos(ws, wp); \ 167 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 168 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 169 ri->next = retrace_info; \ 170 retrace_info = ri; \ 171 } while (/* CONSTCOND */ 0) 172 173 #define POP_SRETRACE() do { \ 174 wp = Xrestpos(ws, wp, statep->ls_start); \ 175 *retrace_info->xp = '\0'; \ 176 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 177 dp = (void *)retrace_info; \ 178 retrace_info = retrace_info->next; \ 179 afree(dp, ATEMP); \ 180 POP_STATE(); \ 181 } while (/* CONSTCOND */ 0) 182 183 /** 184 * Lexical analyser 185 * 186 * tokens are not regular expressions, they are LL(1). 187 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 188 * hence the state stack. Note "$(...)" are now parsed recursively. 189 */ 190 191 int 192 yylex(int cf) 193 { 194 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 195 State_info state_info; 196 int c, c2, state; 197 size_t cz; 198 XString ws; /* expandable output word */ 199 char *wp; /* output word pointer */ 200 char *sp, *dp; 201 202 Again: 203 states[0].type = SINVALID; 204 states[0].ls_base = NULL; 205 statep = &states[1]; 206 state_info.base = states; 207 state_info.end = &state_info.base[STATE_BSIZE]; 208 209 Xinit(ws, wp, 64, ATEMP); 210 211 backslash_skip = 0; 212 ignore_backslash_newline = 0; 213 214 if (cf & ONEWORD) 215 state = SWORD; 216 else if (cf & LETEXPR) { 217 /* enclose arguments in (double) quotes */ 218 *wp++ = OQUOTE; 219 state = SLETPAREN; 220 statep->nparen = 0; 221 } else { 222 /* normal lexing */ 223 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 224 while ((c = getsc()) == ' ' || c == '\t') 225 ; 226 if (c == '#') { 227 ignore_backslash_newline++; 228 while ((c = getsc()) != '\0' && c != '\n') 229 ; 230 ignore_backslash_newline--; 231 } 232 ungetsc(c); 233 } 234 if (source->flags & SF_ALIAS) { 235 /* trailing ' ' in alias definition */ 236 source->flags &= ~SF_ALIAS; 237 cf |= ALIAS; 238 } 239 240 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 241 statep->type = state; 242 243 /* check for here string */ 244 if (state == SHEREDELIM) { 245 c = getsc(); 246 if (c == '<') { 247 state = SHEREDELIM; 248 while ((c = getsc()) == ' ' || c == '\t') 249 ; 250 ungetsc(c); 251 c = '<'; 252 goto accept_nonword; 253 } 254 ungetsc(c); 255 } 256 257 /* collect non-special or quoted characters to form word */ 258 while (!((c = getsc()) == 0 || 259 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { 260 if (state == SBASE && 261 subshell_nesting_type == /*{*/ '}' && 262 c == /*{*/ '}') 263 /* possibly end ${ :;} */ 264 break; 265 accept_nonword: 266 Xcheck(ws, wp); 267 switch (state) { 268 case SADELIM: 269 if (c == '(') 270 statep->nparen++; 271 else if (c == ')') 272 statep->nparen--; 273 else if (statep->nparen == 0 && (c == /*{*/ '}' || 274 c == (int)statep->ls_adelim.delimiter)) { 275 *wp++ = ADELIM; 276 *wp++ = c; 277 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) 278 POP_STATE(); 279 if (c == /*{*/ '}') 280 POP_STATE(); 281 break; 282 } 283 /* FALLTHROUGH */ 284 case SBASE: 285 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 286 /* temporary */ 287 *wp = EOS; 288 if (is_wdvarname(Xstring(ws, wp), false)) { 289 char *p, *tmp; 290 291 if (arraysub(&tmp)) { 292 *wp++ = CHAR; 293 *wp++ = c; 294 for (p = tmp; *p; ) { 295 Xcheck(ws, wp); 296 *wp++ = CHAR; 297 *wp++ = *p++; 298 } 299 afree(tmp, ATEMP); 300 break; 301 } else { 302 Source *s; 303 304 s = pushs(SREREAD, 305 source->areap); 306 s->start = s->str = 307 s->u.freeme = tmp; 308 s->next = source; 309 source = s; 310 } 311 } 312 *wp++ = CHAR; 313 *wp++ = c; 314 break; 315 } 316 /* FALLTHROUGH */ 317 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 318 if (c == '*' || c == '@' || c == '+' || c == '?' || 319 c == '!') { 320 c2 = getsc(); 321 if (c2 == '(' /*)*/ ) { 322 *wp++ = OPAT; 323 *wp++ = c; 324 PUSH_STATE(SPATTERN); 325 break; 326 } 327 ungetsc(c2); 328 } 329 /* FALLTHROUGH */ 330 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 331 switch (c) { 332 case '\\': 333 getsc_qchar: 334 if ((c = getsc())) { 335 /* trailing \ is lost */ 336 *wp++ = QCHAR; 337 *wp++ = c; 338 } 339 break; 340 case '\'': 341 open_ssquote_unless_heredoc: 342 if ((cf & HEREDOC)) 343 goto store_char; 344 *wp++ = OQUOTE; 345 ignore_backslash_newline++; 346 PUSH_STATE(SSQUOTE); 347 break; 348 case '"': 349 open_sdquote: 350 *wp++ = OQUOTE; 351 PUSH_STATE(SDQUOTE); 352 break; 353 case '$': 354 /* 355 * processing of dollar sign belongs into 356 * Subst, except for those which can open 357 * a string: $'' and $"" 358 */ 359 subst_dollar_ex: 360 c = getsc(); 361 switch (c) { 362 case '"': 363 goto open_sdquote; 364 case '\'': 365 goto open_sequote; 366 default: 367 goto SubstS; 368 } 369 default: 370 goto Subst; 371 } 372 break; 373 374 Subst: 375 switch (c) { 376 case '\\': 377 c = getsc(); 378 switch (c) { 379 case '"': 380 if ((cf & HEREDOC)) 381 goto heredocquote; 382 /* FALLTHROUGH */ 383 case '\\': 384 case '$': case '`': 385 store_qchar: 386 *wp++ = QCHAR; 387 *wp++ = c; 388 break; 389 default: 390 heredocquote: 391 Xcheck(ws, wp); 392 if (c) { 393 /* trailing \ is lost */ 394 *wp++ = CHAR; 395 *wp++ = '\\'; 396 *wp++ = CHAR; 397 *wp++ = c; 398 } 399 break; 400 } 401 break; 402 case '$': 403 c = getsc(); 404 SubstS: 405 if (c == '(') /*)*/ { 406 c = getsc(); 407 if (c == '(') /*)*/ { 408 *wp++ = EXPRSUB; 409 PUSH_SRETRACE(SASPAREN); 410 statep->nparen = 2; 411 *retrace_info->xp++ = '('; 412 } else { 413 ungetsc(c); 414 subst_command: 415 c = COMSUB; 416 subst_command2: 417 sp = yyrecursive(c); 418 cz = strlen(sp) + 1; 419 XcheckN(ws, wp, cz); 420 *wp++ = c; 421 memcpy(wp, sp, cz); 422 wp += cz; 423 } 424 } else if (c == '{') /*}*/ { 425 if ((c = getsc()) == '|') { 426 /* 427 * non-subenvironment 428 * value substitution 429 */ 430 c = VALSUB; 431 goto subst_command2; 432 } else if (ctype(c, C_IFSWS)) { 433 /* 434 * non-subenvironment 435 * "command" substitution 436 */ 437 c = FUNSUB; 438 goto subst_command2; 439 } 440 ungetsc(c); 441 *wp++ = OSUBST; 442 *wp++ = '{'; /*}*/ 443 wp = get_brace_var(&ws, wp); 444 c = getsc(); 445 /* allow :# and :% (ksh88 compat) */ 446 if (c == ':') { 447 *wp++ = CHAR; 448 *wp++ = c; 449 c = getsc(); 450 if (c == ':') { 451 *wp++ = CHAR; 452 *wp++ = '0'; 453 *wp++ = ADELIM; 454 *wp++ = ':'; 455 PUSH_STATE(SBRACE); 456 PUSH_STATE(SADELIM); 457 statep->ls_adelim.delimiter = ':'; 458 statep->ls_adelim.num = 1; 459 statep->nparen = 0; 460 break; 461 } else if (ksh_isdigit(c) || 462 c == '('/*)*/ || c == ' ' || 463 /*XXX what else? */ 464 c == '$') { 465 /* substring subst. */ 466 if (c != ' ') { 467 *wp++ = CHAR; 468 *wp++ = ' '; 469 } 470 ungetsc(c); 471 PUSH_STATE(SBRACE); 472 PUSH_STATE(SADELIM); 473 statep->ls_adelim.delimiter = ':'; 474 statep->ls_adelim.num = 2; 475 statep->nparen = 0; 476 break; 477 } 478 } else if (c == '/') { 479 *wp++ = CHAR; 480 *wp++ = c; 481 if ((c = getsc()) == '/') { 482 *wp++ = ADELIM; 483 *wp++ = c; 484 } else 485 ungetsc(c); 486 PUSH_STATE(SBRACE); 487 PUSH_STATE(SADELIM); 488 statep->ls_adelim.delimiter = '/'; 489 statep->ls_adelim.num = 1; 490 statep->nparen = 0; 491 break; 492 } 493 /* 494 * If this is a trim operation, 495 * treat (,|,) specially in STBRACE. 496 */ 497 if (ctype(c, C_SUBOP2)) { 498 ungetsc(c); 499 if (Flag(FSH)) 500 PUSH_STATE(STBRACEBOURNE); 501 else 502 PUSH_STATE(STBRACEKORN); 503 } else { 504 ungetsc(c); 505 if (state == SDQUOTE || 506 state == SQBRACE) 507 PUSH_STATE(SQBRACE); 508 else 509 PUSH_STATE(SBRACE); 510 } 511 } else if (ksh_isalphx(c)) { 512 *wp++ = OSUBST; 513 *wp++ = 'X'; 514 do { 515 Xcheck(ws, wp); 516 *wp++ = c; 517 c = getsc(); 518 } while (ksh_isalnux(c)); 519 *wp++ = '\0'; 520 *wp++ = CSUBST; 521 *wp++ = 'X'; 522 ungetsc(c); 523 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 524 Xcheck(ws, wp); 525 *wp++ = OSUBST; 526 *wp++ = 'X'; 527 *wp++ = c; 528 *wp++ = '\0'; 529 *wp++ = CSUBST; 530 *wp++ = 'X'; 531 } else { 532 *wp++ = CHAR; 533 *wp++ = '$'; 534 ungetsc(c); 535 } 536 break; 537 case '`': 538 subst_gravis: 539 PUSH_STATE(SBQUOTE); 540 *wp++ = COMSUB; 541 /* 542 * Need to know if we are inside double quotes 543 * since sh/AT&T-ksh translate the \" to " in 544 * "`...\"...`". 545 * This is not done in POSIX mode (section 546 * 3.2.3, Double Quotes: "The backquote shall 547 * retain its special meaning introducing the 548 * other form of command substitution (see 549 * 3.6.3). The portion of the quoted string 550 * from the initial backquote and the 551 * characters up to the next backquote that 552 * is not preceded by a backslash (having 553 * escape characters removed) defines that 554 * command whose output replaces `...` when 555 * the word is expanded." 556 * Section 3.6.3, Command Substitution: 557 * "Within the backquoted style of command 558 * substitution, backslash shall retain its 559 * literal meaning, except when followed by 560 * $ ` \."). 561 */ 562 statep->ls_bool = false; 563 s2 = statep; 564 base = state_info.base; 565 while (/* CONSTCOND */ 1) { 566 for (; s2 != base; s2--) { 567 if (s2->type == SDQUOTE) { 568 statep->ls_bool = true; 569 break; 570 } 571 } 572 if (s2 != base) 573 break; 574 if (!(s2 = s2->ls_base)) 575 break; 576 base = s2-- - STATE_BSIZE; 577 } 578 break; 579 case QCHAR: 580 if (cf & LQCHAR) { 581 *wp++ = QCHAR; 582 *wp++ = getsc(); 583 break; 584 } 585 /* FALLTHROUGH */ 586 default: 587 store_char: 588 *wp++ = CHAR; 589 *wp++ = c; 590 } 591 break; 592 593 case SEQUOTE: 594 if (c == '\'') { 595 POP_STATE(); 596 *wp++ = CQUOTE; 597 ignore_backslash_newline--; 598 } else if (c == '\\') { 599 if ((c2 = unbksl(true, s_get, s_put)) == -1) 600 c2 = s_get(); 601 if (c2 == 0) 602 statep->ls_bool = true; 603 if (!statep->ls_bool) { 604 char ts[4]; 605 606 if ((unsigned int)c2 < 0x100) { 607 *wp++ = QCHAR; 608 *wp++ = c2; 609 } else { 610 cz = utf_wctomb(ts, c2 - 0x100); 611 ts[cz] = 0; 612 for (cz = 0; ts[cz]; ++cz) { 613 *wp++ = QCHAR; 614 *wp++ = ts[cz]; 615 } 616 } 617 } 618 } else if (!statep->ls_bool) { 619 *wp++ = QCHAR; 620 *wp++ = c; 621 } 622 break; 623 624 case SSQUOTE: 625 if (c == '\'') { 626 POP_STATE(); 627 if ((cf & HEREDOC) || state == SQBRACE) 628 goto store_char; 629 *wp++ = CQUOTE; 630 ignore_backslash_newline--; 631 } else { 632 *wp++ = QCHAR; 633 *wp++ = c; 634 } 635 break; 636 637 case SDQUOTE: 638 if (c == '"') { 639 POP_STATE(); 640 *wp++ = CQUOTE; 641 } else 642 goto Subst; 643 break; 644 645 /* $(( ... )) */ 646 case SASPAREN: 647 if (c == '(') 648 statep->nparen++; 649 else if (c == ')') { 650 statep->nparen--; 651 if (statep->nparen == 1) { 652 /* end of EXPRSUB */ 653 POP_SRETRACE(); 654 655 if ((c2 = getsc()) == /*(*/ ')') { 656 cz = strlen(sp) - 2; 657 XcheckN(ws, wp, cz); 658 memcpy(wp, sp + 1, cz); 659 wp += cz; 660 afree(sp, ATEMP); 661 *wp++ = '\0'; 662 break; 663 } else { 664 Source *s; 665 666 ungetsc(c2); 667 /* 668 * mismatched parenthesis - 669 * assume we were really 670 * parsing a $(...) expression 671 */ 672 --wp; 673 s = pushs(SREREAD, 674 source->areap); 675 s->start = s->str = 676 s->u.freeme = sp; 677 s->next = source; 678 source = s; 679 goto subst_command; 680 } 681 } 682 } 683 /* reuse existing state machine */ 684 goto Sbase2; 685 686 case SQBRACE: 687 if (c == '\\') { 688 /* 689 * perform POSIX "quote removal" if the back- 690 * slash is "special", i.e. same cases as the 691 * {case '\\':} in Subst: plus closing brace; 692 * in mksh code "quote removal" on '\c' means 693 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 694 * emitted (in heredocquote:) 695 */ 696 if ((c = getsc()) == '"' || c == '\\' || 697 c == '$' || c == '`' || c == /*{*/'}') 698 goto store_qchar; 699 goto heredocquote; 700 } 701 goto common_SQBRACE; 702 703 case SBRACE: 704 if (c == '\'') 705 goto open_ssquote_unless_heredoc; 706 else if (c == '\\') 707 goto getsc_qchar; 708 common_SQBRACE: 709 if (c == '"') 710 goto open_sdquote; 711 else if (c == '$') 712 goto subst_dollar_ex; 713 else if (c == '`') 714 goto subst_gravis; 715 else if (c != /*{*/ '}') 716 goto store_char; 717 POP_STATE(); 718 *wp++ = CSUBST; 719 *wp++ = /*{*/ '}'; 720 break; 721 722 /* Same as SBASE, except (,|,) treated specially */ 723 case STBRACEKORN: 724 if (c == '|') 725 *wp++ = SPAT; 726 else if (c == '(') { 727 *wp++ = OPAT; 728 /* simile for @ */ 729 *wp++ = ' '; 730 PUSH_STATE(SPATTERN); 731 } else /* FALLTHROUGH */ 732 case STBRACEBOURNE: 733 if (c == /*{*/ '}') { 734 POP_STATE(); 735 *wp++ = CSUBST; 736 *wp++ = /*{*/ '}'; 737 } else 738 goto Sbase1; 739 break; 740 741 case SBQUOTE: 742 if (c == '`') { 743 *wp++ = 0; 744 POP_STATE(); 745 } else if (c == '\\') { 746 switch (c = getsc()) { 747 case 0: 748 /* trailing \ is lost */ 749 break; 750 case '\\': 751 case '$': case '`': 752 *wp++ = c; 753 break; 754 case '"': 755 if (statep->ls_bool) { 756 *wp++ = c; 757 break; 758 } 759 /* FALLTHROUGH */ 760 default: 761 *wp++ = '\\'; 762 *wp++ = c; 763 break; 764 } 765 } else 766 *wp++ = c; 767 break; 768 769 /* ONEWORD */ 770 case SWORD: 771 goto Subst; 772 773 /* LETEXPR: (( ... )) */ 774 case SLETPAREN: 775 if (c == /*(*/ ')') { 776 if (statep->nparen > 0) 777 --statep->nparen; 778 else if ((c2 = getsc()) == /*(*/ ')') { 779 c = 0; 780 *wp++ = CQUOTE; 781 goto Done; 782 } else { 783 Source *s; 784 785 ungetsc(c2); 786 /* 787 * mismatched parenthesis - 788 * assume we were really 789 * parsing a (...) expression 790 */ 791 *wp = EOS; 792 sp = Xstring(ws, wp); 793 dp = wdstrip(sp, WDS_KEEPQ); 794 s = pushs(SREREAD, source->areap); 795 s->start = s->str = s->u.freeme = dp; 796 s->next = source; 797 source = s; 798 return ('('/*)*/); 799 } 800 } else if (c == '(') 801 /* 802 * parentheses inside quotes and 803 * backslashes are lost, but AT&T ksh 804 * doesn't count them either 805 */ 806 ++statep->nparen; 807 goto Sbase2; 808 809 /* <<, <<-, <<< delimiter */ 810 case SHEREDELIM: 811 /* 812 * here delimiters need a special case since 813 * $ and `...` are not to be treated specially 814 */ 815 switch (c) { 816 case '\\': 817 if ((c = getsc())) { 818 /* trailing \ is lost */ 819 *wp++ = QCHAR; 820 *wp++ = c; 821 } 822 break; 823 case '\'': 824 goto open_ssquote_unless_heredoc; 825 case '$': 826 if ((c2 = getsc()) == '\'') { 827 open_sequote: 828 *wp++ = OQUOTE; 829 ignore_backslash_newline++; 830 PUSH_STATE(SEQUOTE); 831 statep->ls_bool = false; 832 break; 833 } else if (c2 == '"') { 834 /* FALLTHROUGH */ 835 case '"': 836 PUSH_SRETRACE(SHEREDQUOTE); 837 break; 838 } 839 ungetsc(c2); 840 /* FALLTHROUGH */ 841 default: 842 *wp++ = CHAR; 843 *wp++ = c; 844 } 845 break; 846 847 /* " in <<, <<-, <<< delimiter */ 848 case SHEREDQUOTE: 849 if (c != '"') 850 goto Subst; 851 POP_SRETRACE(); 852 dp = strnul(sp) - 1; 853 /* remove the trailing double quote */ 854 *dp = '\0'; 855 /* store the quoted string */ 856 *wp++ = OQUOTE; 857 XcheckN(ws, wp, (dp - sp)); 858 dp = sp; 859 while ((c = *dp++)) { 860 if (c == '\\') { 861 switch ((c = *dp++)) { 862 case '\\': 863 case '"': 864 case '$': 865 case '`': 866 break; 867 default: 868 *wp++ = CHAR; 869 *wp++ = '\\'; 870 break; 871 } 872 } 873 *wp++ = CHAR; 874 *wp++ = c; 875 } 876 afree(sp, ATEMP); 877 *wp++ = CQUOTE; 878 state = statep->type = SHEREDELIM; 879 break; 880 881 /* in *(...|...) pattern (*+?@!) */ 882 case SPATTERN: 883 if (c == /*(*/ ')') { 884 *wp++ = CPAT; 885 POP_STATE(); 886 } else if (c == '|') { 887 *wp++ = SPAT; 888 } else if (c == '(') { 889 *wp++ = OPAT; 890 /* simile for @ */ 891 *wp++ = ' '; 892 PUSH_STATE(SPATTERN); 893 } else 894 goto Sbase1; 895 break; 896 } 897 } 898 Done: 899 Xcheck(ws, wp); 900 if (statep != &states[1]) 901 /* XXX figure out what is missing */ 902 yyerror("no closing quote\n"); 903 904 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 905 if (state == SHEREDELIM) 906 state = SBASE; 907 908 dp = Xstring(ws, wp); 909 if (state == SBASE && ( 910 #ifndef MKSH_LEGACY_MODE 911 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || 912 #endif 913 c == '<' || c == '>')) { 914 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 915 916 if (Xlength(ws, wp) == 0) 917 iop->unit = c == '<' ? 0 : 1; 918 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { 919 if (dp[c2] != CHAR) 920 goto no_iop; 921 if (!ksh_isdigit(dp[c2 + 1])) 922 goto no_iop; 923 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; 924 } 925 926 if (iop->unit >= FDBASE) 927 goto no_iop; 928 929 if (c == '&') { 930 if ((c2 = getsc()) != '>') { 931 ungetsc(c2); 932 goto no_iop; 933 } 934 c = c2; 935 iop->flag = IOBASH; 936 } else 937 iop->flag = 0; 938 939 c2 = getsc(); 940 /* <<, >>, <> are ok, >< is not */ 941 if (c == c2 || (c == '<' && c2 == '>')) { 942 iop->flag |= c == c2 ? 943 (c == '>' ? IOCAT : IOHERE) : IORDWR; 944 if (iop->flag == IOHERE) { 945 if ((c2 = getsc()) == '-') { 946 iop->flag |= IOSKIP; 947 c2 = getsc(); 948 } else if (c2 == '<') 949 iop->flag |= IOHERESTR; 950 ungetsc(c2); 951 if (c2 == '\n') 952 iop->flag |= IONDELIM; 953 } 954 } else if (c2 == '&') 955 iop->flag |= IODUP | (c == '<' ? IORDUP : 0); 956 else { 957 iop->flag |= c == '>' ? IOWRITE : IOREAD; 958 if (c == '>' && c2 == '|') 959 iop->flag |= IOCLOB; 960 else 961 ungetsc(c2); 962 } 963 964 iop->name = NULL; 965 iop->delim = NULL; 966 iop->heredoc = NULL; 967 /* free word */ 968 Xfree(ws, wp); 969 yylval.iop = iop; 970 return (REDIR); 971 no_iop: 972 afree(iop, ATEMP); 973 } 974 975 if (wp == dp && state == SBASE) { 976 /* free word */ 977 Xfree(ws, wp); 978 /* no word, process LEX1 character */ 979 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { 980 if ((c2 = getsc()) == c) 981 c = (c == ';') ? BREAK : 982 (c == '|') ? LOGOR : 983 (c == '&') ? LOGAND : 984 /* c == '(' ) */ MDPAREN; 985 else if (c == '|' && c2 == '&') 986 c = COPROC; 987 else if (c == ';' && c2 == '|') 988 c = BRKEV; 989 else if (c == ';' && c2 == '&') 990 c = BRKFT; 991 else 992 ungetsc(c2); 993 #ifndef MKSH_SMALL 994 if (c == BREAK) { 995 if ((c2 = getsc()) == '&') 996 c = BRKEV; 997 else 998 ungetsc(c2); 999 } 1000 #endif 1001 } else if (c == '\n') { 1002 gethere(false); 1003 if (cf & CONTIN) 1004 goto Again; 1005 } else if (c == '\0') 1006 /* need here strings at EOF */ 1007 gethere(true); 1008 return (c); 1009 } 1010 1011 /* terminate word */ 1012 *wp++ = EOS; 1013 yylval.cp = Xclose(ws, wp); 1014 if (state == SWORD || state == SLETPAREN 1015 /* XXX ONEWORD? */) 1016 return (LWORD); 1017 1018 /* unget terminator */ 1019 ungetsc(c); 1020 1021 /* 1022 * note: the alias-vs-function code below depends on several 1023 * interna: starting from here, source->str is not modified; 1024 * the way getsc() and ungetsc() operate; etc. 1025 */ 1026 1027 /* copy word to unprefixed string ident */ 1028 sp = yylval.cp; 1029 dp = ident; 1030 if ((cf & HEREDELIM) && (sp[1] == '<')) 1031 while ((dp - ident) < IDENT) { 1032 if ((c = *sp++) == CHAR) 1033 *dp++ = *sp++; 1034 else if ((c != OQUOTE) && (c != CQUOTE)) 1035 break; 1036 } 1037 else 1038 while ((dp - ident) < IDENT && (c = *sp++) == CHAR) 1039 *dp++ = *sp++; 1040 /* Make sure the ident array stays '\0' padded */ 1041 memset(dp, 0, (ident + IDENT) - dp + 1); 1042 if (c != EOS) 1043 /* word is not unquoted */ 1044 *ident = '\0'; 1045 1046 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) { 1047 struct tbl *p; 1048 uint32_t h = hash(ident); 1049 1050 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1051 (!(cf & ESACONLY) || p->val.i == ESAC || 1052 p->val.i == /*{*/ '}')) { 1053 afree(yylval.cp, ATEMP); 1054 return (p->val.i); 1055 } 1056 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1057 (p->flag & ISSET)) { 1058 /* 1059 * this still points to the same character as the 1060 * ungetsc'd terminator from above 1061 */ 1062 const char *cp = source->str; 1063 1064 /* prefer POSIX but not Korn functions over aliases */ 1065 while (*cp == ' ' || *cp == '\t') 1066 /* 1067 * this is like getsc() without skipping 1068 * over Source boundaries (including not 1069 * parsing ungetsc'd characters that got 1070 * pushed into an SREREAD) which is what 1071 * we want here anyway: find out whether 1072 * the alias name is followed by a POSIX 1073 * function definition (only the opening 1074 * parenthesis is checked though) 1075 */ 1076 ++cp; 1077 /* prefer functions over aliases */ 1078 if (cp[0] != '(' || cp[1] != ')') { 1079 Source *s = source; 1080 1081 while (s && (s->flags & SF_HASALIAS)) 1082 if (s->u.tblp == p) 1083 return (LWORD); 1084 else 1085 s = s->next; 1086 /* push alias expansion */ 1087 s = pushs(SALIAS, source->areap); 1088 s->start = s->str = p->val.s; 1089 s->u.tblp = p; 1090 s->flags |= SF_HASALIAS; 1091 s->next = source; 1092 if (source->type == SEOF) { 1093 /* prevent infinite recursion at EOS */ 1094 source->u.tblp = p; 1095 source->flags |= SF_HASALIAS; 1096 } 1097 source = s; 1098 afree(yylval.cp, ATEMP); 1099 goto Again; 1100 } 1101 } 1102 } 1103 1104 return (LWORD); 1105 } 1106 1107 static void 1108 gethere(bool iseof) 1109 { 1110 struct ioword **p; 1111 1112 for (p = heres; p < herep; p++) 1113 if (iseof && !((*p)->flag & IOHERESTR)) 1114 /* only here strings at EOF */ 1115 return; 1116 else 1117 readhere(*p); 1118 herep = heres; 1119 } 1120 1121 /* 1122 * read "<<word" text into temp file 1123 */ 1124 1125 static void 1126 readhere(struct ioword *iop) 1127 { 1128 int c; 1129 const char *eof, *eofp; 1130 XString xs; 1131 char *xp; 1132 int xpos; 1133 1134 if (iop->flag & IOHERESTR) { 1135 /* process the here string */ 1136 iop->heredoc = xp = evalstr(iop->delim, DOBLANK); 1137 xpos = strlen(xp) - 1; 1138 memmove(xp, xp + 1, xpos); 1139 xp[xpos] = '\n'; 1140 return; 1141 } 1142 1143 eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0); 1144 1145 if (!(iop->flag & IOEVAL)) 1146 ignore_backslash_newline++; 1147 1148 Xinit(xs, xp, 256, ATEMP); 1149 1150 heredoc_read_line: 1151 /* beginning of line */ 1152 eofp = eof; 1153 xpos = Xsavepos(xs, xp); 1154 if (iop->flag & IOSKIP) { 1155 /* skip over leading tabs */ 1156 while ((c = getsc()) == '\t') 1157 /* nothing */; 1158 goto heredoc_parse_char; 1159 } 1160 heredoc_read_char: 1161 c = getsc(); 1162 heredoc_parse_char: 1163 /* compare with here document marker */ 1164 if (!*eofp) { 1165 /* end of here document marker, what to do? */ 1166 switch (c) { 1167 case /*(*/ ')': 1168 if (!subshell_nesting_type) 1169 /*- 1170 * not allowed outside $(...) or (...) 1171 * => mismatch 1172 */ 1173 break; 1174 /* allow $(...) or (...) to close here */ 1175 ungetsc(/*(*/ ')'); 1176 /* FALLTHROUGH */ 1177 case 0: 1178 /* 1179 * Allow EOF here to commands without trailing 1180 * newlines (mksh -c '...') will work as well. 1181 */ 1182 case '\n': 1183 /* Newline terminates here document marker */ 1184 goto heredoc_found_terminator; 1185 } 1186 } else if (c == *eofp++) 1187 /* store; then read and compare next character */ 1188 goto heredoc_store_and_loop; 1189 /* nope, mismatch; read until end of line */ 1190 while (c != '\n') { 1191 if (!c) 1192 /* oops, reached EOF */ 1193 yyerror("%s '%s' unclosed\n", "here document", eof); 1194 /* store character */ 1195 Xcheck(xs, xp); 1196 Xput(xs, xp, c); 1197 /* read next character */ 1198 c = getsc(); 1199 } 1200 /* we read a newline as last character */ 1201 heredoc_store_and_loop: 1202 /* store character */ 1203 Xcheck(xs, xp); 1204 Xput(xs, xp, c); 1205 if (c == '\n') 1206 goto heredoc_read_line; 1207 goto heredoc_read_char; 1208 1209 heredoc_found_terminator: 1210 /* jump back to saved beginning of line */ 1211 xp = Xrestpos(xs, xp, xpos); 1212 /* terminate, close and store */ 1213 Xput(xs, xp, '\0'); 1214 iop->heredoc = Xclose(xs, xp); 1215 1216 if (!(iop->flag & IOEVAL)) 1217 ignore_backslash_newline--; 1218 } 1219 1220 void 1221 yyerror(const char *fmt, ...) 1222 { 1223 va_list va; 1224 1225 /* pop aliases and re-reads */ 1226 while (source->type == SALIAS || source->type == SREREAD) 1227 source = source->next; 1228 /* zap pending input */ 1229 source->str = null; 1230 1231 error_prefix(true); 1232 va_start(va, fmt); 1233 shf_vfprintf(shl_out, fmt, va); 1234 va_end(va); 1235 errorfz(); 1236 } 1237 1238 /* 1239 * input for yylex with alias expansion 1240 */ 1241 1242 Source * 1243 pushs(int type, Area *areap) 1244 { 1245 Source *s; 1246 1247 s = alloc(sizeof(Source), areap); 1248 memset(s, 0, sizeof(Source)); 1249 s->type = type; 1250 s->str = null; 1251 s->areap = areap; 1252 if (type == SFILE || type == SSTDIN) 1253 XinitN(s->xs, 256, s->areap); 1254 return (s); 1255 } 1256 1257 static int 1258 getsc_uu(void) 1259 { 1260 Source *s = source; 1261 int c; 1262 1263 while ((c = *s->str++) == 0) { 1264 /* return 0 for EOF by default */ 1265 s->str = NULL; 1266 switch (s->type) { 1267 case SEOF: 1268 s->str = null; 1269 return (0); 1270 1271 case SSTDIN: 1272 case SFILE: 1273 getsc_line(s); 1274 break; 1275 1276 case SWSTR: 1277 break; 1278 1279 case SSTRING: 1280 case SSTRINGCMDLINE: 1281 break; 1282 1283 case SWORDS: 1284 s->start = s->str = *s->u.strv++; 1285 s->type = SWORDSEP; 1286 break; 1287 1288 case SWORDSEP: 1289 if (*s->u.strv == NULL) { 1290 s->start = s->str = "\n"; 1291 s->type = SEOF; 1292 } else { 1293 s->start = s->str = " "; 1294 s->type = SWORDS; 1295 } 1296 break; 1297 1298 case SALIAS: 1299 if (s->flags & SF_ALIASEND) { 1300 /* pass on an unused SF_ALIAS flag */ 1301 source = s->next; 1302 source->flags |= s->flags & SF_ALIAS; 1303 s = source; 1304 } else if (*s->u.tblp->val.s && 1305 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { 1306 /* pop source stack */ 1307 source = s = s->next; 1308 /* 1309 * Note that this alias ended with a 1310 * space, enabling alias expansion on 1311 * the following word. 1312 */ 1313 s->flags |= SF_ALIAS; 1314 } else { 1315 /* 1316 * At this point, we need to keep the current 1317 * alias in the source list so recursive 1318 * aliases can be detected and we also need to 1319 * return the next character. Do this by 1320 * temporarily popping the alias to get the 1321 * next character and then put it back in the 1322 * source list with the SF_ALIASEND flag set. 1323 */ 1324 /* pop source stack */ 1325 source = s->next; 1326 source->flags |= s->flags & SF_ALIAS; 1327 c = getsc_uu(); 1328 if (c) { 1329 s->flags |= SF_ALIASEND; 1330 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1331 s->start = s->str = s->ugbuf; 1332 s->next = source; 1333 source = s; 1334 } else { 1335 s = source; 1336 /* avoid reading EOF twice */ 1337 s->str = NULL; 1338 break; 1339 } 1340 } 1341 continue; 1342 1343 case SREREAD: 1344 if (s->start != s->ugbuf) 1345 /* yuck */ 1346 afree(s->u.freeme, ATEMP); 1347 source = s = s->next; 1348 continue; 1349 } 1350 if (s->str == NULL) { 1351 s->type = SEOF; 1352 s->start = s->str = null; 1353 return ('\0'); 1354 } 1355 if (s->flags & SF_ECHO) { 1356 shf_puts(s->str, shl_out); 1357 shf_flush(shl_out); 1358 } 1359 } 1360 return (c); 1361 } 1362 1363 static void 1364 getsc_line(Source *s) 1365 { 1366 char *xp = Xstring(s->xs, xp), *cp; 1367 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1368 bool have_tty = tobool(interactive && (s->flags & SF_TTY)); 1369 1370 /* Done here to ensure nothing odd happens when a timeout occurs */ 1371 XcheckN(s->xs, xp, LINE); 1372 *xp = '\0'; 1373 s->start = s->str = xp; 1374 1375 if (have_tty && ksh_tmout) { 1376 ksh_tmout_state = TMOUT_READING; 1377 alarm(ksh_tmout); 1378 } 1379 if (interactive) 1380 change_winsz(); 1381 #ifndef MKSH_NO_CMDLINE_EDITING 1382 if (have_tty && ( 1383 #if !MKSH_S_NOVI 1384 Flag(FVI) || 1385 #endif 1386 Flag(FEMACS) || Flag(FGMACS))) { 1387 int nread; 1388 1389 nread = x_read(xp); 1390 if (nread < 0) 1391 /* read error */ 1392 nread = 0; 1393 xp[nread] = '\0'; 1394 xp += nread; 1395 } else 1396 #endif 1397 { 1398 if (interactive) 1399 pprompt(prompt, 0); 1400 else 1401 s->line++; 1402 1403 while (/* CONSTCOND */ 1) { 1404 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1405 1406 if (!p && shf_error(s->u.shf) && 1407 shf_errno(s->u.shf) == EINTR) { 1408 shf_clearerr(s->u.shf); 1409 if (trap) 1410 runtraps(0); 1411 continue; 1412 } 1413 if (!p || (xp = p, xp[-1] == '\n')) 1414 break; 1415 /* double buffer size */ 1416 /* move past NUL so doubling works... */ 1417 xp++; 1418 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1419 /* ...and move back again */ 1420 xp--; 1421 } 1422 /* 1423 * flush any unwanted input so other programs/builtins 1424 * can read it. Not very optimal, but less error prone 1425 * than flushing else where, dealing with redirections, 1426 * etc. 1427 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1428 */ 1429 if (s->type == SSTDIN) 1430 shf_flush(s->u.shf); 1431 } 1432 /* 1433 * XXX: temporary kludge to restore source after a 1434 * trap may have been executed. 1435 */ 1436 source = s; 1437 if (have_tty && ksh_tmout) { 1438 ksh_tmout_state = TMOUT_EXECUTING; 1439 alarm(0); 1440 } 1441 cp = Xstring(s->xs, xp); 1442 rndpush(cp); 1443 s->start = s->str = cp; 1444 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1445 /* Note: if input is all nulls, this is not eof */ 1446 if (Xlength(s->xs, xp) == 0) { 1447 /* EOF */ 1448 if (s->type == SFILE) 1449 shf_fdclose(s->u.shf); 1450 s->str = NULL; 1451 } else if (interactive && *s->str && 1452 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) { 1453 histsave(&s->line, s->str, true, true); 1454 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1455 } else if (interactive && cur_prompt == PS1) { 1456 cp = Xstring(s->xs, xp); 1457 while (*cp && ctype(*cp, C_IFSWS)) 1458 ++cp; 1459 if (!*cp) 1460 histsync(); 1461 #endif 1462 } 1463 if (interactive) 1464 set_prompt(PS2, NULL); 1465 } 1466 1467 void 1468 set_prompt(int to, Source *s) 1469 { 1470 cur_prompt = to; 1471 1472 switch (to) { 1473 /* command */ 1474 case PS1: 1475 /* 1476 * Substitute ! and !! here, before substitutions are done 1477 * so ! in expanded variables are not expanded. 1478 * NOTE: this is not what AT&T ksh does (it does it after 1479 * substitutions, POSIX doesn't say which is to be done. 1480 */ 1481 { 1482 struct shf *shf; 1483 char * volatile ps1; 1484 Area *saved_atemp; 1485 1486 ps1 = str_val(global("PS1")); 1487 shf = shf_sopen(NULL, strlen(ps1) * 2, 1488 SHF_WR | SHF_DYNAMIC, NULL); 1489 while (*ps1) 1490 if (*ps1 != '!' || *++ps1 == '!') 1491 shf_putchar(*ps1++, shf); 1492 else 1493 shf_fprintf(shf, "%d", 1494 s ? s->line + 1 : 0); 1495 ps1 = shf_sclose(shf); 1496 saved_atemp = ATEMP; 1497 newenv(E_ERRH); 1498 if (kshsetjmp(e->jbuf)) { 1499 prompt = safe_prompt; 1500 /* 1501 * Don't print an error - assume it has already 1502 * been printed. Reason is we may have forked 1503 * to run a command and the child may be 1504 * unwinding its stack through this code as it 1505 * exits. 1506 */ 1507 } else { 1508 char *cp = substitute(ps1, 0); 1509 strdupx(prompt, cp, saved_atemp); 1510 } 1511 quitenv(NULL); 1512 } 1513 break; 1514 /* command continuation */ 1515 case PS2: 1516 prompt = str_val(global("PS2")); 1517 break; 1518 } 1519 } 1520 1521 int 1522 pprompt(const char *cp, int ntruncate) 1523 { 1524 char delimiter = 0; 1525 bool doprint = (ntruncate != -1); 1526 bool indelimit = false; 1527 int columns = 0, lines = 0; 1528 1529 /* 1530 * Undocumented AT&T ksh feature: 1531 * If the second char in the prompt string is \r then the first 1532 * char is taken to be a non-printing delimiter and any chars 1533 * between two instances of the delimiter are not considered to 1534 * be part of the prompt length 1535 */ 1536 if (*cp && cp[1] == '\r') { 1537 delimiter = *cp; 1538 cp += 2; 1539 } 1540 for (; *cp; cp++) { 1541 if (indelimit && *cp != delimiter) 1542 ; 1543 else if (*cp == '\n' || *cp == '\r') { 1544 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1545 columns = 0; 1546 } else if (*cp == '\t') { 1547 columns = (columns | 7) + 1; 1548 } else if (*cp == '\b') { 1549 if (columns > 0) 1550 columns--; 1551 } else if (*cp == delimiter) 1552 indelimit = !indelimit; 1553 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { 1554 const char *cp2; 1555 columns += utf_widthadj(cp, &cp2); 1556 if (doprint && (indelimit || 1557 (ntruncate < (x_cols * lines + columns)))) 1558 shf_write(cp, cp2 - cp, shl_out); 1559 cp = cp2 - /* loop increment */ 1; 1560 continue; 1561 } else 1562 columns++; 1563 if (doprint && (*cp != delimiter) && 1564 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1565 shf_putc(*cp, shl_out); 1566 } 1567 if (doprint) 1568 shf_flush(shl_out); 1569 return (x_cols * lines + columns); 1570 } 1571 1572 /* 1573 * Read the variable part of a ${...} expression (i.e. up to but not 1574 * including the :[-+?=#%] or close-brace). 1575 */ 1576 static char * 1577 get_brace_var(XString *wsp, char *wp) 1578 { 1579 char c; 1580 enum parse_state { 1581 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1582 PS_NUMBER, PS_VAR1 1583 } state = PS_INITIAL; 1584 1585 while (/* CONSTCOND */ 1) { 1586 c = getsc(); 1587 /* State machine to figure out where the variable part ends. */ 1588 switch (state) { 1589 case PS_INITIAL: 1590 if (c == '#' || c == '!' || c == '%') { 1591 state = PS_SAW_HASH; 1592 break; 1593 } 1594 /* FALLTHROUGH */ 1595 case PS_SAW_HASH: 1596 if (ksh_isalphx(c)) 1597 state = PS_IDENT; 1598 else if (ksh_isdigit(c)) 1599 state = PS_NUMBER; 1600 else if (c == '#') { 1601 if (state == PS_SAW_HASH) { 1602 char c2; 1603 1604 c2 = getsc(); 1605 ungetsc(c2); 1606 if (c2 != /*{*/ '}') { 1607 ungetsc(c); 1608 goto out; 1609 } 1610 } 1611 state = PS_VAR1; 1612 } else if (ctype(c, C_VAR1)) 1613 state = PS_VAR1; 1614 else 1615 goto out; 1616 break; 1617 case PS_IDENT: 1618 if (!ksh_isalnux(c)) { 1619 if (c == '[') { 1620 char *tmp, *p; 1621 1622 if (!arraysub(&tmp)) 1623 yyerror("missing ]\n"); 1624 *wp++ = c; 1625 for (p = tmp; *p; ) { 1626 Xcheck(*wsp, wp); 1627 *wp++ = *p++; 1628 } 1629 afree(tmp, ATEMP); 1630 /* the ] */ 1631 c = getsc(); 1632 } 1633 goto out; 1634 } 1635 break; 1636 case PS_NUMBER: 1637 if (!ksh_isdigit(c)) 1638 goto out; 1639 break; 1640 case PS_VAR1: 1641 goto out; 1642 } 1643 Xcheck(*wsp, wp); 1644 *wp++ = c; 1645 } 1646 out: 1647 /* end of variable part */ 1648 *wp++ = '\0'; 1649 ungetsc(c); 1650 return (wp); 1651 } 1652 1653 /* 1654 * Save an array subscript - returns true if matching bracket found, false 1655 * if eof or newline was found. 1656 * (Returned string double null terminated) 1657 */ 1658 static bool 1659 arraysub(char **strp) 1660 { 1661 XString ws; 1662 char *wp, c; 1663 /* we are just past the initial [ */ 1664 unsigned int depth = 1; 1665 1666 Xinit(ws, wp, 32, ATEMP); 1667 1668 do { 1669 c = getsc(); 1670 Xcheck(ws, wp); 1671 *wp++ = c; 1672 if (c == '[') 1673 depth++; 1674 else if (c == ']') 1675 depth--; 1676 } while (depth > 0 && c && c != '\n'); 1677 1678 *wp++ = '\0'; 1679 *strp = Xclose(ws, wp); 1680 1681 return (tobool(depth == 0)); 1682 } 1683 1684 /* Unget a char: handles case when we are already at the start of the buffer */ 1685 static void 1686 ungetsc(int c) 1687 { 1688 struct sretrace_info *rp = retrace_info; 1689 1690 if (backslash_skip) 1691 backslash_skip--; 1692 /* Don't unget EOF... */ 1693 if (source->str == null && c == '\0') 1694 return; 1695 while (rp) { 1696 if (Xlength(rp->xs, rp->xp)) 1697 rp->xp--; 1698 rp = rp->next; 1699 } 1700 ungetsc_i(c); 1701 } 1702 static void 1703 ungetsc_i(int c) 1704 { 1705 if (source->str > source->start) 1706 source->str--; 1707 else { 1708 Source *s; 1709 1710 s = pushs(SREREAD, source->areap); 1711 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1712 s->start = s->str = s->ugbuf; 1713 s->next = source; 1714 source = s; 1715 } 1716 } 1717 1718 1719 /* Called to get a char that isn't a \newline sequence. */ 1720 static int 1721 getsc_bn(void) 1722 { 1723 int c, c2; 1724 1725 if (ignore_backslash_newline) 1726 return (o_getsc_u()); 1727 1728 if (backslash_skip == 1) { 1729 backslash_skip = 2; 1730 return (o_getsc_u()); 1731 } 1732 1733 backslash_skip = 0; 1734 1735 while (/* CONSTCOND */ 1) { 1736 c = o_getsc_u(); 1737 if (c == '\\') { 1738 if ((c2 = o_getsc_u()) == '\n') 1739 /* ignore the \newline; get the next char... */ 1740 continue; 1741 ungetsc_i(c2); 1742 backslash_skip = 1; 1743 } 1744 return (c); 1745 } 1746 } 1747 1748 void 1749 yyskiputf8bom(void) 1750 { 1751 int c; 1752 1753 if ((unsigned char)(c = o_getsc_u()) != 0xEF) { 1754 ungetsc_i(c); 1755 return; 1756 } 1757 if ((unsigned char)(c = o_getsc_u()) != 0xBB) { 1758 ungetsc_i(c); 1759 ungetsc_i(0xEF); 1760 return; 1761 } 1762 if ((unsigned char)(c = o_getsc_u()) != 0xBF) { 1763 ungetsc_i(c); 1764 ungetsc_i(0xBB); 1765 ungetsc_i(0xEF); 1766 return; 1767 } 1768 UTFMODE |= 8; 1769 } 1770 1771 static Lex_state * 1772 push_state_i(State_info *si, Lex_state *old_end) 1773 { 1774 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1775 1776 news[0].ls_base = old_end; 1777 si->base = &news[0]; 1778 si->end = &news[STATE_BSIZE]; 1779 return (&news[1]); 1780 } 1781 1782 static Lex_state * 1783 pop_state_i(State_info *si, Lex_state *old_end) 1784 { 1785 Lex_state *old_base = si->base; 1786 1787 si->base = old_end->ls_base - STATE_BSIZE; 1788 si->end = old_end->ls_base; 1789 1790 afree(old_base, ATEMP); 1791 1792 return (si->base + STATE_BSIZE - 1); 1793 } 1794 1795 static int 1796 s_get(void) 1797 { 1798 return (getsc()); 1799 } 1800 1801 static void 1802 s_put(int c) 1803 { 1804 ungetsc(c); 1805 } 1806