1 /* $OpenBSD: lex.c,v 1.46 2013/01/20 14:47:46 stsp Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 * 2011, 2012, 2013 6 * Thorsten Glaser <tg (at) mirbsd.org> 7 * 8 * Provided that these terms and disclaimer and all copyright notices 9 * are retained or reproduced in an accompanying document, permission 10 * is granted to deal in this work without restriction, including un- 11 * limited rights to use, publicly perform, distribute, sell, modify, 12 * merge, give away, or sublicence. 13 * 14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 15 * the utmost extent permitted by applicable law, neither express nor 16 * implied; without malicious intent or gross negligence. In no event 17 * may a licensor, author or contributor be held liable for indirect, 18 * direct, other damage, loss, or other issues arising in any way out 19 * of dealing in the work, even if advised of the possibility of such 20 * damage or existence of a defect, except proven that it results out 21 * of said person's immediate fault when using the work as intended. 22 */ 23 24 #include "sh.h" 25 26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.182 2013/02/19 18:45:20 tg Exp $"); 27 28 /* 29 * states while lexing word 30 */ 31 #define SBASE 0 /* outside any lexical constructs */ 32 #define SWORD 1 /* implicit quoting for substitute() */ 33 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 34 #define SSQUOTE 3 /* inside '' */ 35 #define SDQUOTE 4 /* inside "" */ 36 #define SEQUOTE 5 /* inside $'' */ 37 #define SBRACE 6 /* inside ${} */ 38 #define SQBRACE 7 /* inside "${}" */ 39 #define SBQUOTE 8 /* inside `` */ 40 #define SASPAREN 9 /* inside $(( )) */ 41 #define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */ 42 #define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */ 43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 44 #define SADELIM 13 /* like SBASE, looking for delimiter */ 45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 int start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 } Lex_state; 81 #define ls_base u.base 82 #define ls_start u.start 83 #define ls_bool u.abool 84 #define ls_adelim u.adelim 85 86 typedef struct { 87 Lex_state *base; 88 Lex_state *end; 89 } State_info; 90 91 static void readhere(struct ioword *); 92 static void ungetsc(int); 93 static void ungetsc_i(int); 94 static int getsc_uu(void); 95 static void getsc_line(Source *); 96 static int getsc_bn(void); 97 static int s_get(void); 98 static void s_put(int); 99 static char *get_brace_var(XString *, char *); 100 static bool arraysub(char **); 101 static void gethere(bool); 102 static Lex_state *push_state_i(State_info *, Lex_state *); 103 static Lex_state *pop_state_i(State_info *, Lex_state *); 104 105 static int dopprompt(const char *, int, bool); 106 107 static int backslash_skip; 108 static int ignore_backslash_newline; 109 110 /* optimised getsc_bn() */ 111 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 112 !backslash_skip ? *source->str++ : getsc_bn()) 113 /* optimised getsc_uu() */ 114 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 115 116 /* retrace helper */ 117 #define o_getsc_r(carg) { \ 118 int cev = (carg); \ 119 struct sretrace_info *rp = retrace_info; \ 120 \ 121 while (rp) { \ 122 Xcheck(rp->xs, rp->xp); \ 123 *rp->xp++ = cev; \ 124 rp = rp->next; \ 125 } \ 126 \ 127 return (cev); \ 128 } 129 130 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) 131 static int getsc(void); 132 133 static int 134 getsc(void) 135 { 136 o_getsc_r(o_getsc()); 137 } 138 #else 139 static int getsc_r(int); 140 141 static int 142 getsc_r(int c) 143 { 144 o_getsc_r(c); 145 } 146 147 #define getsc() getsc_r(o_getsc()) 148 #endif 149 150 #define STATE_BSIZE 8 151 152 #define PUSH_STATE(s) do { \ 153 if (++statep == state_info.end) \ 154 statep = push_state_i(&state_info, statep); \ 155 state = statep->type = (s); \ 156 } while (/* CONSTCOND */ 0) 157 158 #define POP_STATE() do { \ 159 if (--statep == state_info.base) \ 160 statep = pop_state_i(&state_info, statep); \ 161 state = statep->type; \ 162 } while (/* CONSTCOND */ 0) 163 164 #define PUSH_SRETRACE() do { \ 165 struct sretrace_info *ri; \ 166 \ 167 statep->ls_start = Xsavepos(ws, wp); \ 168 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 169 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 170 ri->next = retrace_info; \ 171 retrace_info = ri; \ 172 } while (/* CONSTCOND */ 0) 173 174 #define POP_SRETRACE() do { \ 175 wp = Xrestpos(ws, wp, statep->ls_start); \ 176 *retrace_info->xp = '\0'; \ 177 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 178 dp = (void *)retrace_info; \ 179 retrace_info = retrace_info->next; \ 180 afree(dp, ATEMP); \ 181 } while (/* CONSTCOND */ 0) 182 183 /** 184 * Lexical analyser 185 * 186 * tokens are not regular expressions, they are LL(1). 187 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 188 * hence the state stack. Note "$(...)" are now parsed recursively. 189 */ 190 191 int 192 yylex(int cf) 193 { 194 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 195 State_info state_info; 196 int c, c2, state; 197 size_t cz; 198 XString ws; /* expandable output word */ 199 char *wp; /* output word pointer */ 200 char *sp, *dp; 201 202 Again: 203 states[0].type = SINVALID; 204 states[0].ls_base = NULL; 205 statep = &states[1]; 206 state_info.base = states; 207 state_info.end = &state_info.base[STATE_BSIZE]; 208 209 Xinit(ws, wp, 64, ATEMP); 210 211 backslash_skip = 0; 212 ignore_backslash_newline = 0; 213 214 if (cf & ONEWORD) 215 state = SWORD; 216 else if (cf & LETEXPR) { 217 /* enclose arguments in (double) quotes */ 218 *wp++ = OQUOTE; 219 state = SLETPAREN; 220 statep->nparen = 0; 221 } else { 222 /* normal lexing */ 223 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 224 while ((c = getsc()) == ' ' || c == '\t') 225 ; 226 if (c == '#') { 227 ignore_backslash_newline++; 228 while ((c = getsc()) != '\0' && c != '\n') 229 ; 230 ignore_backslash_newline--; 231 } 232 ungetsc(c); 233 } 234 if (source->flags & SF_ALIAS) { 235 /* trailing ' ' in alias definition */ 236 source->flags &= ~SF_ALIAS; 237 cf |= ALIAS; 238 } 239 240 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 241 statep->type = state; 242 243 /* check for here string */ 244 if (state == SHEREDELIM) { 245 c = getsc(); 246 if (c == '<') { 247 state = SHEREDELIM; 248 while ((c = getsc()) == ' ' || c == '\t') 249 ; 250 ungetsc(c); 251 c = '<'; 252 goto accept_nonword; 253 } 254 ungetsc(c); 255 } 256 257 /* collect non-special or quoted characters to form word */ 258 while (!((c = getsc()) == 0 || 259 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { 260 if (state == SBASE && 261 subshell_nesting_type == /*{*/ '}' && 262 c == /*{*/ '}') 263 /* possibly end ${ :;} */ 264 break; 265 accept_nonword: 266 Xcheck(ws, wp); 267 switch (state) { 268 case SADELIM: 269 if (c == '(') 270 statep->nparen++; 271 else if (c == ')') 272 statep->nparen--; 273 else if (statep->nparen == 0 && (c == /*{*/ '}' || 274 c == (int)statep->ls_adelim.delimiter)) { 275 *wp++ = ADELIM; 276 *wp++ = c; 277 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) 278 POP_STATE(); 279 if (c == /*{*/ '}') 280 POP_STATE(); 281 break; 282 } 283 /* FALLTHROUGH */ 284 case SBASE: 285 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 286 /* temporary */ 287 *wp = EOS; 288 if (is_wdvarname(Xstring(ws, wp), false)) { 289 char *p, *tmp; 290 291 if (arraysub(&tmp)) { 292 *wp++ = CHAR; 293 *wp++ = c; 294 for (p = tmp; *p; ) { 295 Xcheck(ws, wp); 296 *wp++ = CHAR; 297 *wp++ = *p++; 298 } 299 afree(tmp, ATEMP); 300 break; 301 } else { 302 Source *s; 303 304 s = pushs(SREREAD, 305 source->areap); 306 s->start = s->str = 307 s->u.freeme = tmp; 308 s->next = source; 309 source = s; 310 } 311 } 312 *wp++ = CHAR; 313 *wp++ = c; 314 break; 315 } 316 /* FALLTHROUGH */ 317 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 318 if (c == '*' || c == '@' || c == '+' || c == '?' || 319 c == '!') { 320 c2 = getsc(); 321 if (c2 == '(' /*)*/ ) { 322 *wp++ = OPAT; 323 *wp++ = c; 324 PUSH_STATE(SPATTERN); 325 break; 326 } 327 ungetsc(c2); 328 } 329 /* FALLTHROUGH */ 330 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 331 switch (c) { 332 case '\\': 333 getsc_qchar: 334 if ((c = getsc())) { 335 /* trailing \ is lost */ 336 *wp++ = QCHAR; 337 *wp++ = c; 338 } 339 break; 340 case '\'': 341 open_ssquote: 342 *wp++ = OQUOTE; 343 ignore_backslash_newline++; 344 PUSH_STATE(SSQUOTE); 345 break; 346 case '"': 347 open_sdquote: 348 *wp++ = OQUOTE; 349 PUSH_STATE(SDQUOTE); 350 break; 351 case '$': 352 /* 353 * processing of dollar sign belongs into 354 * Subst, except for those which can open 355 * a string: $'' and $"" 356 */ 357 subst_dollar_ex: 358 c = getsc(); 359 switch (c) { 360 case '"': 361 goto open_sdquote; 362 case '\'': 363 goto open_sequote; 364 default: 365 goto SubstS; 366 } 367 default: 368 goto Subst; 369 } 370 break; 371 372 Subst: 373 switch (c) { 374 case '\\': 375 c = getsc(); 376 switch (c) { 377 case '"': 378 if ((cf & HEREDOC)) 379 goto heredocquote; 380 /* FALLTHROUGH */ 381 case '\\': 382 case '$': case '`': 383 store_qchar: 384 *wp++ = QCHAR; 385 *wp++ = c; 386 break; 387 default: 388 heredocquote: 389 Xcheck(ws, wp); 390 if (c) { 391 /* trailing \ is lost */ 392 *wp++ = CHAR; 393 *wp++ = '\\'; 394 *wp++ = CHAR; 395 *wp++ = c; 396 } 397 break; 398 } 399 break; 400 case '$': 401 c = getsc(); 402 SubstS: 403 if (c == '(') /*)*/ { 404 c = getsc(); 405 if (c == '(') /*)*/ { 406 *wp++ = EXPRSUB; 407 PUSH_STATE(SASPAREN); 408 statep->nparen = 2; 409 PUSH_SRETRACE(); 410 *retrace_info->xp++ = '('; 411 } else { 412 ungetsc(c); 413 subst_command: 414 c = COMSUB; 415 subst_command2: 416 sp = yyrecursive(c); 417 cz = strlen(sp) + 1; 418 XcheckN(ws, wp, cz); 419 *wp++ = c; 420 memcpy(wp, sp, cz); 421 wp += cz; 422 } 423 } else if (c == '{') /*}*/ { 424 c = getsc(); 425 if (ctype(c, C_IFSWS)) { 426 /* 427 * non-subenvironment 428 * "command" substitution 429 */ 430 c = FUNSUB; 431 goto subst_command2; 432 } 433 ungetsc(c); 434 *wp++ = OSUBST; 435 *wp++ = '{'; /*}*/ 436 wp = get_brace_var(&ws, wp); 437 c = getsc(); 438 /* allow :# and :% (ksh88 compat) */ 439 if (c == ':') { 440 *wp++ = CHAR; 441 *wp++ = c; 442 c = getsc(); 443 if (c == ':') { 444 *wp++ = CHAR; 445 *wp++ = '0'; 446 *wp++ = ADELIM; 447 *wp++ = ':'; 448 PUSH_STATE(SBRACE); 449 PUSH_STATE(SADELIM); 450 statep->ls_adelim.delimiter = ':'; 451 statep->ls_adelim.num = 1; 452 statep->nparen = 0; 453 break; 454 } else if (ksh_isdigit(c) || 455 c == '('/*)*/ || c == ' ' || 456 /*XXX what else? */ 457 c == '$') { 458 /* substring subst. */ 459 if (c != ' ') { 460 *wp++ = CHAR; 461 *wp++ = ' '; 462 } 463 ungetsc(c); 464 PUSH_STATE(SBRACE); 465 PUSH_STATE(SADELIM); 466 statep->ls_adelim.delimiter = ':'; 467 statep->ls_adelim.num = 2; 468 statep->nparen = 0; 469 break; 470 } 471 } else if (c == '/') { 472 *wp++ = CHAR; 473 *wp++ = c; 474 if ((c = getsc()) == '/') { 475 *wp++ = ADELIM; 476 *wp++ = c; 477 } else 478 ungetsc(c); 479 PUSH_STATE(SBRACE); 480 PUSH_STATE(SADELIM); 481 statep->ls_adelim.delimiter = '/'; 482 statep->ls_adelim.num = 1; 483 statep->nparen = 0; 484 break; 485 } 486 /* 487 * If this is a trim operation, 488 * treat (,|,) specially in STBRACE. 489 */ 490 if (ctype(c, C_SUBOP2)) { 491 ungetsc(c); 492 if (Flag(FSH)) 493 PUSH_STATE(STBRACEBOURNE); 494 else 495 PUSH_STATE(STBRACEKORN); 496 } else { 497 ungetsc(c); 498 if (state == SDQUOTE) 499 PUSH_STATE(SQBRACE); 500 else 501 PUSH_STATE(SBRACE); 502 } 503 } else if (ksh_isalphx(c)) { 504 *wp++ = OSUBST; 505 *wp++ = 'X'; 506 do { 507 Xcheck(ws, wp); 508 *wp++ = c; 509 c = getsc(); 510 } while (ksh_isalnux(c)); 511 *wp++ = '\0'; 512 *wp++ = CSUBST; 513 *wp++ = 'X'; 514 ungetsc(c); 515 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 516 Xcheck(ws, wp); 517 *wp++ = OSUBST; 518 *wp++ = 'X'; 519 *wp++ = c; 520 *wp++ = '\0'; 521 *wp++ = CSUBST; 522 *wp++ = 'X'; 523 } else { 524 *wp++ = CHAR; 525 *wp++ = '$'; 526 ungetsc(c); 527 } 528 break; 529 case '`': 530 subst_gravis: 531 PUSH_STATE(SBQUOTE); 532 *wp++ = COMSUB; 533 /* 534 * Need to know if we are inside double quotes 535 * since sh/AT&T-ksh translate the \" to " in 536 * "`...\"...`". 537 * This is not done in POSIX mode (section 538 * 3.2.3, Double Quotes: "The backquote shall 539 * retain its special meaning introducing the 540 * other form of command substitution (see 541 * 3.6.3). The portion of the quoted string 542 * from the initial backquote and the 543 * characters up to the next backquote that 544 * is not preceded by a backslash (having 545 * escape characters removed) defines that 546 * command whose output replaces `...` when 547 * the word is expanded." 548 * Section 3.6.3, Command Substitution: 549 * "Within the backquoted style of command 550 * substitution, backslash shall retain its 551 * literal meaning, except when followed by 552 * $ ` \."). 553 */ 554 statep->ls_bool = false; 555 s2 = statep; 556 base = state_info.base; 557 while (/* CONSTCOND */ 1) { 558 for (; s2 != base; s2--) { 559 if (s2->type == SDQUOTE) { 560 statep->ls_bool = true; 561 break; 562 } 563 } 564 if (s2 != base) 565 break; 566 if (!(s2 = s2->ls_base)) 567 break; 568 base = s2-- - STATE_BSIZE; 569 } 570 break; 571 case QCHAR: 572 if (cf & LQCHAR) { 573 *wp++ = QCHAR; 574 *wp++ = getsc(); 575 break; 576 } 577 /* FALLTHROUGH */ 578 default: 579 store_char: 580 *wp++ = CHAR; 581 *wp++ = c; 582 } 583 break; 584 585 case SEQUOTE: 586 if (c == '\'') { 587 POP_STATE(); 588 *wp++ = CQUOTE; 589 ignore_backslash_newline--; 590 } else if (c == '\\') { 591 if ((c2 = unbksl(true, s_get, s_put)) == -1) 592 c2 = s_get(); 593 if (c2 == 0) 594 statep->ls_bool = true; 595 if (!statep->ls_bool) { 596 char ts[4]; 597 598 if ((unsigned int)c2 < 0x100) { 599 *wp++ = QCHAR; 600 *wp++ = c2; 601 } else { 602 cz = utf_wctomb(ts, c2 - 0x100); 603 ts[cz] = 0; 604 for (cz = 0; ts[cz]; ++cz) { 605 *wp++ = QCHAR; 606 *wp++ = ts[cz]; 607 } 608 } 609 } 610 } else if (!statep->ls_bool) { 611 *wp++ = QCHAR; 612 *wp++ = c; 613 } 614 break; 615 616 case SSQUOTE: 617 if (c == '\'') { 618 POP_STATE(); 619 *wp++ = CQUOTE; 620 ignore_backslash_newline--; 621 } else { 622 *wp++ = QCHAR; 623 *wp++ = c; 624 } 625 break; 626 627 case SDQUOTE: 628 if (c == '"') { 629 POP_STATE(); 630 *wp++ = CQUOTE; 631 } else 632 goto Subst; 633 break; 634 635 /* $(( ... )) */ 636 case SASPAREN: 637 if (c == '(') 638 statep->nparen++; 639 else if (c == ')') { 640 statep->nparen--; 641 if (statep->nparen == 1) { 642 /* end of EXPRSUB */ 643 POP_SRETRACE(); 644 POP_STATE(); 645 646 if ((c2 = getsc()) == /*(*/ ')') { 647 cz = strlen(sp) - 2; 648 XcheckN(ws, wp, cz); 649 memcpy(wp, sp + 1, cz); 650 wp += cz; 651 afree(sp, ATEMP); 652 *wp++ = '\0'; 653 break; 654 } else { 655 Source *s; 656 657 ungetsc(c2); 658 /* 659 * mismatched parenthesis - 660 * assume we were really 661 * parsing a $(...) expression 662 */ 663 --wp; 664 s = pushs(SREREAD, 665 source->areap); 666 s->start = s->str = 667 s->u.freeme = sp; 668 s->next = source; 669 source = s; 670 goto subst_command; 671 } 672 } 673 } 674 /* reuse existing state machine */ 675 goto Sbase2; 676 677 case SQBRACE: 678 if (c == '\\') { 679 /* 680 * perform POSIX "quote removal" if the back- 681 * slash is "special", i.e. same cases as the 682 * {case '\\':} in Subst: plus closing brace; 683 * in mksh code "quote removal" on '\c' means 684 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 685 * emitted (in heredocquote:) 686 */ 687 if ((c = getsc()) == '"' || c == '\\' || 688 c == '$' || c == '`' || c == /*{*/'}') 689 goto store_qchar; 690 goto heredocquote; 691 } 692 goto common_SQBRACE; 693 694 case SBRACE: 695 if (c == '\'') 696 goto open_ssquote; 697 else if (c == '\\') 698 goto getsc_qchar; 699 common_SQBRACE: 700 if (c == '"') 701 goto open_sdquote; 702 else if (c == '$') 703 goto subst_dollar_ex; 704 else if (c == '`') 705 goto subst_gravis; 706 else if (c != /*{*/ '}') 707 goto store_char; 708 POP_STATE(); 709 *wp++ = CSUBST; 710 *wp++ = /*{*/ '}'; 711 break; 712 713 /* Same as SBASE, except (,|,) treated specially */ 714 case STBRACEKORN: 715 if (c == '|') 716 *wp++ = SPAT; 717 else if (c == '(') { 718 *wp++ = OPAT; 719 /* simile for @ */ 720 *wp++ = ' '; 721 PUSH_STATE(SPATTERN); 722 } else /* FALLTHROUGH */ 723 case STBRACEBOURNE: 724 if (c == /*{*/ '}') { 725 POP_STATE(); 726 *wp++ = CSUBST; 727 *wp++ = /*{*/ '}'; 728 } else 729 goto Sbase1; 730 break; 731 732 case SBQUOTE: 733 if (c == '`') { 734 *wp++ = 0; 735 POP_STATE(); 736 } else if (c == '\\') { 737 switch (c = getsc()) { 738 case 0: 739 /* trailing \ is lost */ 740 break; 741 case '\\': 742 case '$': case '`': 743 *wp++ = c; 744 break; 745 case '"': 746 if (statep->ls_bool) { 747 *wp++ = c; 748 break; 749 } 750 /* FALLTHROUGH */ 751 default: 752 *wp++ = '\\'; 753 *wp++ = c; 754 break; 755 } 756 } else 757 *wp++ = c; 758 break; 759 760 /* ONEWORD */ 761 case SWORD: 762 goto Subst; 763 764 /* LETEXPR: (( ... )) */ 765 case SLETPAREN: 766 if (c == /*(*/ ')') { 767 if (statep->nparen > 0) 768 --statep->nparen; 769 else if ((c2 = getsc()) == /*(*/ ')') { 770 c = 0; 771 *wp++ = CQUOTE; 772 goto Done; 773 } else { 774 Source *s; 775 776 ungetsc(c2); 777 /* 778 * mismatched parenthesis - 779 * assume we were really 780 * parsing a (...) expression 781 */ 782 *wp = EOS; 783 sp = Xstring(ws, wp); 784 dp = wdstrip(sp, WDS_KEEPQ); 785 s = pushs(SREREAD, source->areap); 786 s->start = s->str = s->u.freeme = dp; 787 s->next = source; 788 source = s; 789 return ('('/*)*/); 790 } 791 } else if (c == '(') 792 /* 793 * parentheses inside quotes and 794 * backslashes are lost, but AT&T ksh 795 * doesn't count them either 796 */ 797 ++statep->nparen; 798 goto Sbase2; 799 800 /* <<, <<-, <<< delimiter */ 801 case SHEREDELIM: 802 /* 803 * here delimiters need a special case since 804 * $ and `...` are not to be treated specially 805 */ 806 switch (c) { 807 case '\\': 808 if ((c = getsc())) { 809 /* trailing \ is lost */ 810 *wp++ = QCHAR; 811 *wp++ = c; 812 } 813 break; 814 case '\'': 815 goto open_ssquote; 816 case '$': 817 if ((c2 = getsc()) == '\'') { 818 open_sequote: 819 *wp++ = OQUOTE; 820 ignore_backslash_newline++; 821 PUSH_STATE(SEQUOTE); 822 statep->ls_bool = false; 823 break; 824 } else if (c2 == '"') { 825 /* FALLTHROUGH */ 826 case '"': 827 state = statep->type = SHEREDQUOTE; 828 PUSH_SRETRACE(); 829 break; 830 } 831 ungetsc(c2); 832 /* FALLTHROUGH */ 833 default: 834 *wp++ = CHAR; 835 *wp++ = c; 836 } 837 break; 838 839 /* " in <<, <<-, <<< delimiter */ 840 case SHEREDQUOTE: 841 if (c != '"') 842 goto Subst; 843 POP_SRETRACE(); 844 dp = strnul(sp) - 1; 845 /* remove the trailing double quote */ 846 *dp = '\0'; 847 /* store the quoted string */ 848 *wp++ = OQUOTE; 849 XcheckN(ws, wp, (dp - sp)); 850 dp = sp; 851 while ((c = *dp++)) { 852 if (c == '\\') { 853 switch ((c = *dp++)) { 854 case '\\': 855 case '"': 856 case '$': 857 case '`': 858 break; 859 default: 860 *wp++ = CHAR; 861 *wp++ = '\\'; 862 break; 863 } 864 } 865 *wp++ = CHAR; 866 *wp++ = c; 867 } 868 afree(sp, ATEMP); 869 *wp++ = CQUOTE; 870 state = statep->type = SHEREDELIM; 871 break; 872 873 /* in *(...|...) pattern (*+?@!) */ 874 case SPATTERN: 875 if (c == /*(*/ ')') { 876 *wp++ = CPAT; 877 POP_STATE(); 878 } else if (c == '|') { 879 *wp++ = SPAT; 880 } else if (c == '(') { 881 *wp++ = OPAT; 882 /* simile for @ */ 883 *wp++ = ' '; 884 PUSH_STATE(SPATTERN); 885 } else 886 goto Sbase1; 887 break; 888 } 889 } 890 Done: 891 Xcheck(ws, wp); 892 if (statep != &states[1]) 893 /* XXX figure out what is missing */ 894 yyerror("no closing quote\n"); 895 896 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 897 if (state == SHEREDELIM) 898 state = SBASE; 899 900 dp = Xstring(ws, wp); 901 if ((c == '<' || c == '>' || c == '&') && state == SBASE) { 902 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 903 904 if (Xlength(ws, wp) == 0) 905 iop->unit = c == '<' ? 0 : 1; 906 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { 907 if (dp[c2] != CHAR) 908 goto no_iop; 909 if (!ksh_isdigit(dp[c2 + 1])) 910 goto no_iop; 911 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; 912 } 913 914 if (iop->unit >= FDBASE) 915 goto no_iop; 916 917 if (c == '&') { 918 if ((c2 = getsc()) != '>') { 919 ungetsc(c2); 920 goto no_iop; 921 } 922 c = c2; 923 iop->flag = IOBASH; 924 } else 925 iop->flag = 0; 926 927 c2 = getsc(); 928 /* <<, >>, <> are ok, >< is not */ 929 if (c == c2 || (c == '<' && c2 == '>')) { 930 iop->flag |= c == c2 ? 931 (c == '>' ? IOCAT : IOHERE) : IORDWR; 932 if (iop->flag == IOHERE) { 933 if ((c2 = getsc()) == '-') { 934 iop->flag |= IOSKIP; 935 c2 = getsc(); 936 } else if (c2 == '<') 937 iop->flag |= IOHERESTR; 938 ungetsc(c2); 939 if (c2 == '\n') 940 iop->flag |= IONDELIM; 941 } 942 } else if (c2 == '&') 943 iop->flag |= IODUP | (c == '<' ? IORDUP : 0); 944 else { 945 iop->flag |= c == '>' ? IOWRITE : IOREAD; 946 if (c == '>' && c2 == '|') 947 iop->flag |= IOCLOB; 948 else 949 ungetsc(c2); 950 } 951 952 iop->name = NULL; 953 iop->delim = NULL; 954 iop->heredoc = NULL; 955 /* free word */ 956 Xfree(ws, wp); 957 yylval.iop = iop; 958 return (REDIR); 959 no_iop: 960 afree(iop, ATEMP); 961 } 962 963 if (wp == dp && state == SBASE) { 964 /* free word */ 965 Xfree(ws, wp); 966 /* no word, process LEX1 character */ 967 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { 968 if ((c2 = getsc()) == c) 969 c = (c == ';') ? BREAK : 970 (c == '|') ? LOGOR : 971 (c == '&') ? LOGAND : 972 /* c == '(' ) */ MDPAREN; 973 else if (c == '|' && c2 == '&') 974 c = COPROC; 975 else if (c == ';' && c2 == '|') 976 c = BRKEV; 977 else if (c == ';' && c2 == '&') 978 c = BRKFT; 979 else 980 ungetsc(c2); 981 #ifndef MKSH_SMALL 982 if (c == BREAK) { 983 if ((c2 = getsc()) == '&') 984 c = BRKEV; 985 else 986 ungetsc(c2); 987 } 988 #endif 989 } else if (c == '\n') { 990 gethere(false); 991 if (cf & CONTIN) 992 goto Again; 993 } else if (c == '\0') 994 /* need here strings at EOF */ 995 gethere(true); 996 return (c); 997 } 998 999 /* terminate word */ 1000 *wp++ = EOS; 1001 yylval.cp = Xclose(ws, wp); 1002 if (state == SWORD || state == SLETPAREN 1003 /* XXX ONEWORD? */) 1004 return (LWORD); 1005 1006 /* unget terminator */ 1007 ungetsc(c); 1008 1009 /* 1010 * note: the alias-vs-function code below depends on several 1011 * interna: starting from here, source->str is not modified; 1012 * the way getsc() and ungetsc() operate; etc. 1013 */ 1014 1015 /* copy word to unprefixed string ident */ 1016 sp = yylval.cp; 1017 dp = ident; 1018 if ((cf & HEREDELIM) && (sp[1] == '<')) 1019 while ((dp - ident) < IDENT) { 1020 if ((c = *sp++) == CHAR) 1021 *dp++ = *sp++; 1022 else if ((c != OQUOTE) && (c != CQUOTE)) 1023 break; 1024 } 1025 else 1026 while ((dp - ident) < IDENT && (c = *sp++) == CHAR) 1027 *dp++ = *sp++; 1028 /* Make sure the ident array stays '\0' padded */ 1029 memset(dp, 0, (ident + IDENT) - dp + 1); 1030 if (c != EOS) 1031 /* word is not unquoted */ 1032 *ident = '\0'; 1033 1034 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) { 1035 struct tbl *p; 1036 uint32_t h = hash(ident); 1037 1038 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1039 (!(cf & ESACONLY) || p->val.i == ESAC || 1040 p->val.i == /*{*/ '}')) { 1041 afree(yylval.cp, ATEMP); 1042 return (p->val.i); 1043 } 1044 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1045 (p->flag & ISSET)) { 1046 /* 1047 * this still points to the same character as the 1048 * ungetsc'd terminator from above 1049 */ 1050 const char *cp = source->str; 1051 1052 /* prefer POSIX but not Korn functions over aliases */ 1053 while (*cp == ' ' || *cp == '\t') 1054 /* 1055 * this is like getsc() without skipping 1056 * over Source boundaries (including not 1057 * parsing ungetsc'd characters that got 1058 * pushed into an SREREAD) which is what 1059 * we want here anyway: find out whether 1060 * the alias name is followed by a POSIX 1061 * function definition (only the opening 1062 * parenthesis is checked though) 1063 */ 1064 ++cp; 1065 /* prefer functions over aliases */ 1066 if (cp[0] != '(' || cp[1] != ')') { 1067 Source *s = source; 1068 1069 while (s && (s->flags & SF_HASALIAS)) 1070 if (s->u.tblp == p) 1071 return (LWORD); 1072 else 1073 s = s->next; 1074 /* push alias expansion */ 1075 s = pushs(SALIAS, source->areap); 1076 s->start = s->str = p->val.s; 1077 s->u.tblp = p; 1078 s->flags |= SF_HASALIAS; 1079 s->next = source; 1080 if (source->type == SEOF) { 1081 /* prevent infinite recursion at EOS */ 1082 source->u.tblp = p; 1083 source->flags |= SF_HASALIAS; 1084 } 1085 source = s; 1086 afree(yylval.cp, ATEMP); 1087 goto Again; 1088 } 1089 } 1090 } 1091 1092 return (LWORD); 1093 } 1094 1095 static void 1096 gethere(bool iseof) 1097 { 1098 struct ioword **p; 1099 1100 for (p = heres; p < herep; p++) 1101 if (iseof && !((*p)->flag & IOHERESTR)) 1102 /* only here strings at EOF */ 1103 return; 1104 else 1105 readhere(*p); 1106 herep = heres; 1107 } 1108 1109 /* 1110 * read "<<word" text into temp file 1111 */ 1112 1113 static void 1114 readhere(struct ioword *iop) 1115 { 1116 int c; 1117 const char *eof, *eofp; 1118 XString xs; 1119 char *xp; 1120 int xpos; 1121 1122 if (iop->flag & IOHERESTR) { 1123 /* process the here string */ 1124 iop->heredoc = xp = evalstr(iop->delim, DOBLANK); 1125 xpos = strlen(xp) - 1; 1126 memmove(xp, xp + 1, xpos); 1127 xp[xpos] = '\n'; 1128 return; 1129 } 1130 1131 eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0); 1132 1133 if (!(iop->flag & IOEVAL)) 1134 ignore_backslash_newline++; 1135 1136 Xinit(xs, xp, 256, ATEMP); 1137 1138 heredoc_read_line: 1139 /* beginning of line */ 1140 eofp = eof; 1141 xpos = Xsavepos(xs, xp); 1142 if (iop->flag & IOSKIP) { 1143 /* skip over leading tabs */ 1144 while ((c = getsc()) == '\t') 1145 /* nothing */; 1146 goto heredoc_parse_char; 1147 } 1148 heredoc_read_char: 1149 c = getsc(); 1150 heredoc_parse_char: 1151 /* compare with here document marker */ 1152 if (!*eofp) { 1153 /* end of here document marker, what to do? */ 1154 switch (c) { 1155 case /*(*/ ')': 1156 if (!subshell_nesting_type) 1157 /*- 1158 * not allowed outside $(...) or (...) 1159 * => mismatch 1160 */ 1161 break; 1162 /* allow $(...) or (...) to close here */ 1163 ungetsc(/*(*/ ')'); 1164 /* FALLTHROUGH */ 1165 case 0: 1166 /* 1167 * Allow EOF here to commands without trailing 1168 * newlines (mksh -c '...') will work as well. 1169 */ 1170 case '\n': 1171 /* Newline terminates here document marker */ 1172 goto heredoc_found_terminator; 1173 } 1174 } else if (c == *eofp++) 1175 /* store; then read and compare next character */ 1176 goto heredoc_store_and_loop; 1177 /* nope, mismatch; read until end of line */ 1178 while (c != '\n') { 1179 if (!c) 1180 /* oops, reached EOF */ 1181 yyerror("%s '%s' unclosed\n", "here document", eof); 1182 /* store character */ 1183 Xcheck(xs, xp); 1184 Xput(xs, xp, c); 1185 /* read next character */ 1186 c = getsc(); 1187 } 1188 /* we read a newline as last character */ 1189 heredoc_store_and_loop: 1190 /* store character */ 1191 Xcheck(xs, xp); 1192 Xput(xs, xp, c); 1193 if (c == '\n') 1194 goto heredoc_read_line; 1195 goto heredoc_read_char; 1196 1197 heredoc_found_terminator: 1198 /* jump back to saved beginning of line */ 1199 xp = Xrestpos(xs, xp, xpos); 1200 /* terminate, close and store */ 1201 Xput(xs, xp, '\0'); 1202 iop->heredoc = Xclose(xs, xp); 1203 1204 if (!(iop->flag & IOEVAL)) 1205 ignore_backslash_newline--; 1206 } 1207 1208 void 1209 yyerror(const char *fmt, ...) 1210 { 1211 va_list va; 1212 1213 /* pop aliases and re-reads */ 1214 while (source->type == SALIAS || source->type == SREREAD) 1215 source = source->next; 1216 /* zap pending input */ 1217 source->str = null; 1218 1219 error_prefix(true); 1220 va_start(va, fmt); 1221 shf_vfprintf(shl_out, fmt, va); 1222 va_end(va); 1223 errorfz(); 1224 } 1225 1226 /* 1227 * input for yylex with alias expansion 1228 */ 1229 1230 Source * 1231 pushs(int type, Area *areap) 1232 { 1233 Source *s; 1234 1235 s = alloc(sizeof(Source), areap); 1236 memset(s, 0, sizeof(Source)); 1237 s->type = type; 1238 s->str = null; 1239 s->areap = areap; 1240 if (type == SFILE || type == SSTDIN) 1241 XinitN(s->xs, 256, s->areap); 1242 return (s); 1243 } 1244 1245 static int 1246 getsc_uu(void) 1247 { 1248 Source *s = source; 1249 int c; 1250 1251 while ((c = *s->str++) == 0) { 1252 /* return 0 for EOF by default */ 1253 s->str = NULL; 1254 switch (s->type) { 1255 case SEOF: 1256 s->str = null; 1257 return (0); 1258 1259 case SSTDIN: 1260 case SFILE: 1261 getsc_line(s); 1262 break; 1263 1264 case SWSTR: 1265 break; 1266 1267 case SSTRING: 1268 case SSTRINGCMDLINE: 1269 break; 1270 1271 case SWORDS: 1272 s->start = s->str = *s->u.strv++; 1273 s->type = SWORDSEP; 1274 break; 1275 1276 case SWORDSEP: 1277 if (*s->u.strv == NULL) { 1278 s->start = s->str = "\n"; 1279 s->type = SEOF; 1280 } else { 1281 s->start = s->str = " "; 1282 s->type = SWORDS; 1283 } 1284 break; 1285 1286 case SALIAS: 1287 if (s->flags & SF_ALIASEND) { 1288 /* pass on an unused SF_ALIAS flag */ 1289 source = s->next; 1290 source->flags |= s->flags & SF_ALIAS; 1291 s = source; 1292 } else if (*s->u.tblp->val.s && 1293 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { 1294 /* pop source stack */ 1295 source = s = s->next; 1296 /* 1297 * Note that this alias ended with a 1298 * space, enabling alias expansion on 1299 * the following word. 1300 */ 1301 s->flags |= SF_ALIAS; 1302 } else { 1303 /* 1304 * At this point, we need to keep the current 1305 * alias in the source list so recursive 1306 * aliases can be detected and we also need to 1307 * return the next character. Do this by 1308 * temporarily popping the alias to get the 1309 * next character and then put it back in the 1310 * source list with the SF_ALIASEND flag set. 1311 */ 1312 /* pop source stack */ 1313 source = s->next; 1314 source->flags |= s->flags & SF_ALIAS; 1315 c = getsc_uu(); 1316 if (c) { 1317 s->flags |= SF_ALIASEND; 1318 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1319 s->start = s->str = s->ugbuf; 1320 s->next = source; 1321 source = s; 1322 } else { 1323 s = source; 1324 /* avoid reading EOF twice */ 1325 s->str = NULL; 1326 break; 1327 } 1328 } 1329 continue; 1330 1331 case SREREAD: 1332 if (s->start != s->ugbuf) 1333 /* yuck */ 1334 afree(s->u.freeme, ATEMP); 1335 source = s = s->next; 1336 continue; 1337 } 1338 if (s->str == NULL) { 1339 s->type = SEOF; 1340 s->start = s->str = null; 1341 return ('\0'); 1342 } 1343 if (s->flags & SF_ECHO) { 1344 shf_puts(s->str, shl_out); 1345 shf_flush(shl_out); 1346 } 1347 } 1348 return (c); 1349 } 1350 1351 static void 1352 getsc_line(Source *s) 1353 { 1354 char *xp = Xstring(s->xs, xp), *cp; 1355 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1356 bool have_tty = tobool(interactive && (s->flags & SF_TTY)); 1357 1358 /* Done here to ensure nothing odd happens when a timeout occurs */ 1359 XcheckN(s->xs, xp, LINE); 1360 *xp = '\0'; 1361 s->start = s->str = xp; 1362 1363 if (have_tty && ksh_tmout) { 1364 ksh_tmout_state = TMOUT_READING; 1365 alarm(ksh_tmout); 1366 } 1367 if (interactive) 1368 change_winsz(); 1369 #ifndef MKSH_NO_CMDLINE_EDITING 1370 if (have_tty && ( 1371 #if !MKSH_S_NOVI 1372 Flag(FVI) || 1373 #endif 1374 Flag(FEMACS) || Flag(FGMACS))) { 1375 int nread; 1376 1377 nread = x_read(xp, LINE); 1378 if (nread < 0) 1379 /* read error */ 1380 nread = 0; 1381 xp[nread] = '\0'; 1382 xp += nread; 1383 } else 1384 #endif 1385 { 1386 if (interactive) 1387 pprompt(prompt, 0); 1388 else 1389 s->line++; 1390 1391 while (/* CONSTCOND */ 1) { 1392 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1393 1394 if (!p && shf_error(s->u.shf) && 1395 shf_errno(s->u.shf) == EINTR) { 1396 shf_clearerr(s->u.shf); 1397 if (trap) 1398 runtraps(0); 1399 continue; 1400 } 1401 if (!p || (xp = p, xp[-1] == '\n')) 1402 break; 1403 /* double buffer size */ 1404 /* move past NUL so doubling works... */ 1405 xp++; 1406 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1407 /* ...and move back again */ 1408 xp--; 1409 } 1410 /* 1411 * flush any unwanted input so other programs/builtins 1412 * can read it. Not very optimal, but less error prone 1413 * than flushing else where, dealing with redirections, 1414 * etc. 1415 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1416 */ 1417 if (s->type == SSTDIN) 1418 shf_flush(s->u.shf); 1419 } 1420 /* 1421 * XXX: temporary kludge to restore source after a 1422 * trap may have been executed. 1423 */ 1424 source = s; 1425 if (have_tty && ksh_tmout) { 1426 ksh_tmout_state = TMOUT_EXECUTING; 1427 alarm(0); 1428 } 1429 cp = Xstring(s->xs, xp); 1430 s->start = s->str = cp; 1431 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1432 /* Note: if input is all nulls, this is not eof */ 1433 if (Xlength(s->xs, xp) == 0) { 1434 /* EOF */ 1435 if (s->type == SFILE) 1436 shf_fdclose(s->u.shf); 1437 s->str = NULL; 1438 } else if (interactive && *s->str && 1439 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) { 1440 histsave(&s->line, s->str, true, true); 1441 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1442 } else if (interactive && cur_prompt == PS1) { 1443 cp = Xstring(s->xs, xp); 1444 while (*cp && ctype(*cp, C_IFSWS)) 1445 ++cp; 1446 if (!*cp) 1447 histsync(); 1448 #endif 1449 } 1450 if (interactive) 1451 set_prompt(PS2, NULL); 1452 } 1453 1454 void 1455 set_prompt(int to, Source *s) 1456 { 1457 cur_prompt = to; 1458 1459 switch (to) { 1460 /* command */ 1461 case PS1: 1462 /* 1463 * Substitute ! and !! here, before substitutions are done 1464 * so ! in expanded variables are not expanded. 1465 * NOTE: this is not what AT&T ksh does (it does it after 1466 * substitutions, POSIX doesn't say which is to be done. 1467 */ 1468 { 1469 struct shf *shf; 1470 char * volatile ps1; 1471 Area *saved_atemp; 1472 1473 ps1 = str_val(global("PS1")); 1474 shf = shf_sopen(NULL, strlen(ps1) * 2, 1475 SHF_WR | SHF_DYNAMIC, NULL); 1476 while (*ps1) 1477 if (*ps1 != '!' || *++ps1 == '!') 1478 shf_putchar(*ps1++, shf); 1479 else 1480 shf_fprintf(shf, "%d", 1481 s ? s->line + 1 : 0); 1482 ps1 = shf_sclose(shf); 1483 saved_atemp = ATEMP; 1484 newenv(E_ERRH); 1485 if (kshsetjmp(e->jbuf)) { 1486 prompt = safe_prompt; 1487 /* 1488 * Don't print an error - assume it has already 1489 * been printed. Reason is we may have forked 1490 * to run a command and the child may be 1491 * unwinding its stack through this code as it 1492 * exits. 1493 */ 1494 } else { 1495 char *cp = substitute(ps1, 0); 1496 strdupx(prompt, cp, saved_atemp); 1497 } 1498 quitenv(NULL); 1499 } 1500 break; 1501 /* command continuation */ 1502 case PS2: 1503 prompt = str_val(global("PS2")); 1504 break; 1505 } 1506 } 1507 1508 static int 1509 dopprompt(const char *cp, int ntruncate, bool doprint) 1510 { 1511 int columns = 0, lines = 0; 1512 bool indelimit = false; 1513 char delimiter = 0; 1514 1515 /* 1516 * Undocumented AT&T ksh feature: 1517 * If the second char in the prompt string is \r then the first 1518 * char is taken to be a non-printing delimiter and any chars 1519 * between two instances of the delimiter are not considered to 1520 * be part of the prompt length 1521 */ 1522 if (*cp && cp[1] == '\r') { 1523 delimiter = *cp; 1524 cp += 2; 1525 } 1526 for (; *cp; cp++) { 1527 if (indelimit && *cp != delimiter) 1528 ; 1529 else if (*cp == '\n' || *cp == '\r') { 1530 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1531 columns = 0; 1532 } else if (*cp == '\t') { 1533 columns = (columns | 7) + 1; 1534 } else if (*cp == '\b') { 1535 if (columns > 0) 1536 columns--; 1537 } else if (*cp == delimiter) 1538 indelimit = !indelimit; 1539 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { 1540 const char *cp2; 1541 columns += utf_widthadj(cp, &cp2); 1542 if (doprint && (indelimit || 1543 (ntruncate < (x_cols * lines + columns)))) 1544 shf_write(cp, cp2 - cp, shl_out); 1545 cp = cp2 - /* loop increment */ 1; 1546 continue; 1547 } else 1548 columns++; 1549 if (doprint && (*cp != delimiter) && 1550 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1551 shf_putc(*cp, shl_out); 1552 } 1553 if (doprint) 1554 shf_flush(shl_out); 1555 return (x_cols * lines + columns); 1556 } 1557 1558 1559 void 1560 pprompt(const char *cp, int ntruncate) 1561 { 1562 dopprompt(cp, ntruncate, true); 1563 } 1564 1565 int 1566 promptlen(const char *cp) 1567 { 1568 return (dopprompt(cp, 0, false)); 1569 } 1570 1571 /* 1572 * Read the variable part of a ${...} expression (i.e. up to but not 1573 * including the :[-+?=#%] or close-brace). 1574 */ 1575 static char * 1576 get_brace_var(XString *wsp, char *wp) 1577 { 1578 char c; 1579 enum parse_state { 1580 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1581 PS_NUMBER, PS_VAR1 1582 } state = PS_INITIAL; 1583 1584 while (/* CONSTCOND */ 1) { 1585 c = getsc(); 1586 /* State machine to figure out where the variable part ends. */ 1587 switch (state) { 1588 case PS_INITIAL: 1589 if (c == '#' || c == '!' || c == '%') { 1590 state = PS_SAW_HASH; 1591 break; 1592 } 1593 /* FALLTHROUGH */ 1594 case PS_SAW_HASH: 1595 if (ksh_isalphx(c)) 1596 state = PS_IDENT; 1597 else if (ksh_isdigit(c)) 1598 state = PS_NUMBER; 1599 else if (c == '#') { 1600 if (state == PS_SAW_HASH) { 1601 char c2; 1602 1603 c2 = getsc(); 1604 ungetsc(c2); 1605 if (c2 != /*{*/ '}') { 1606 ungetsc(c); 1607 goto out; 1608 } 1609 } 1610 state = PS_VAR1; 1611 } else if (ctype(c, C_VAR1)) 1612 state = PS_VAR1; 1613 else 1614 goto out; 1615 break; 1616 case PS_IDENT: 1617 if (!ksh_isalnux(c)) { 1618 if (c == '[') { 1619 char *tmp, *p; 1620 1621 if (!arraysub(&tmp)) 1622 yyerror("missing ]\n"); 1623 *wp++ = c; 1624 for (p = tmp; *p; ) { 1625 Xcheck(*wsp, wp); 1626 *wp++ = *p++; 1627 } 1628 afree(tmp, ATEMP); 1629 /* the ] */ 1630 c = getsc(); 1631 } 1632 goto out; 1633 } 1634 break; 1635 case PS_NUMBER: 1636 if (!ksh_isdigit(c)) 1637 goto out; 1638 break; 1639 case PS_VAR1: 1640 goto out; 1641 } 1642 Xcheck(*wsp, wp); 1643 *wp++ = c; 1644 } 1645 out: 1646 /* end of variable part */ 1647 *wp++ = '\0'; 1648 ungetsc(c); 1649 return (wp); 1650 } 1651 1652 /* 1653 * Save an array subscript - returns true if matching bracket found, false 1654 * if eof or newline was found. 1655 * (Returned string double null terminated) 1656 */ 1657 static bool 1658 arraysub(char **strp) 1659 { 1660 XString ws; 1661 char *wp, c; 1662 /* we are just past the initial [ */ 1663 unsigned int depth = 1; 1664 1665 Xinit(ws, wp, 32, ATEMP); 1666 1667 do { 1668 c = getsc(); 1669 Xcheck(ws, wp); 1670 *wp++ = c; 1671 if (c == '[') 1672 depth++; 1673 else if (c == ']') 1674 depth--; 1675 } while (depth > 0 && c && c != '\n'); 1676 1677 *wp++ = '\0'; 1678 *strp = Xclose(ws, wp); 1679 1680 return (tobool(depth == 0)); 1681 } 1682 1683 /* Unget a char: handles case when we are already at the start of the buffer */ 1684 static void 1685 ungetsc(int c) 1686 { 1687 struct sretrace_info *rp = retrace_info; 1688 1689 if (backslash_skip) 1690 backslash_skip--; 1691 /* Don't unget EOF... */ 1692 if (source->str == null && c == '\0') 1693 return; 1694 while (rp) { 1695 if (Xlength(rp->xs, rp->xp)) 1696 rp->xp--; 1697 rp = rp->next; 1698 } 1699 ungetsc_i(c); 1700 } 1701 static void 1702 ungetsc_i(int c) 1703 { 1704 if (source->str > source->start) 1705 source->str--; 1706 else { 1707 Source *s; 1708 1709 s = pushs(SREREAD, source->areap); 1710 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1711 s->start = s->str = s->ugbuf; 1712 s->next = source; 1713 source = s; 1714 } 1715 } 1716 1717 1718 /* Called to get a char that isn't a \newline sequence. */ 1719 static int 1720 getsc_bn(void) 1721 { 1722 int c, c2; 1723 1724 if (ignore_backslash_newline) 1725 return (o_getsc_u()); 1726 1727 if (backslash_skip == 1) { 1728 backslash_skip = 2; 1729 return (o_getsc_u()); 1730 } 1731 1732 backslash_skip = 0; 1733 1734 while (/* CONSTCOND */ 1) { 1735 c = o_getsc_u(); 1736 if (c == '\\') { 1737 if ((c2 = o_getsc_u()) == '\n') 1738 /* ignore the \newline; get the next char... */ 1739 continue; 1740 ungetsc_i(c2); 1741 backslash_skip = 1; 1742 } 1743 return (c); 1744 } 1745 } 1746 1747 void 1748 yyskiputf8bom(void) 1749 { 1750 int c; 1751 1752 if ((unsigned char)(c = o_getsc_u()) != 0xEF) { 1753 ungetsc_i(c); 1754 return; 1755 } 1756 if ((unsigned char)(c = o_getsc_u()) != 0xBB) { 1757 ungetsc_i(c); 1758 ungetsc_i(0xEF); 1759 return; 1760 } 1761 if ((unsigned char)(c = o_getsc_u()) != 0xBF) { 1762 ungetsc_i(c); 1763 ungetsc_i(0xBB); 1764 ungetsc_i(0xEF); 1765 return; 1766 } 1767 UTFMODE |= 8; 1768 } 1769 1770 static Lex_state * 1771 push_state_i(State_info *si, Lex_state *old_end) 1772 { 1773 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1774 1775 news[0].ls_base = old_end; 1776 si->base = &news[0]; 1777 si->end = &news[STATE_BSIZE]; 1778 return (&news[1]); 1779 } 1780 1781 static Lex_state * 1782 pop_state_i(State_info *si, Lex_state *old_end) 1783 { 1784 Lex_state *old_base = si->base; 1785 1786 si->base = old_end->ls_base - STATE_BSIZE; 1787 si->end = old_end->ls_base; 1788 1789 afree(old_base, ATEMP); 1790 1791 return (si->base + STATE_BSIZE - 1); 1792 } 1793 1794 static int 1795 s_get(void) 1796 { 1797 return (getsc()); 1798 } 1799 1800 static void 1801 s_put(int c) 1802 { 1803 ungetsc(c); 1804 } 1805