1 /* $OpenBSD: lex.c,v 1.47 2013/03/03 19:11:34 guenther Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 * 2011, 2012, 2013 6 * Thorsten Glaser <tg (at) mirbsd.org> 7 * 8 * Provided that these terms and disclaimer and all copyright notices 9 * are retained or reproduced in an accompanying document, permission 10 * is granted to deal in this work without restriction, including un- 11 * limited rights to use, publicly perform, distribute, sell, modify, 12 * merge, give away, or sublicence. 13 * 14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 15 * the utmost extent permitted by applicable law, neither express nor 16 * implied; without malicious intent or gross negligence. In no event 17 * may a licensor, author or contributor be held liable for indirect, 18 * direct, other damage, loss, or other issues arising in any way out 19 * of dealing in the work, even if advised of the possibility of such 20 * damage or existence of a defect, except proven that it results out 21 * of said person's immediate fault when using the work as intended. 22 */ 23 24 #include "sh.h" 25 26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.188 2013/08/10 13:44:31 tg Exp $"); 27 28 /* 29 * states while lexing word 30 */ 31 #define SBASE 0 /* outside any lexical constructs */ 32 #define SWORD 1 /* implicit quoting for substitute() */ 33 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 34 #define SSQUOTE 3 /* inside '' */ 35 #define SDQUOTE 4 /* inside "" */ 36 #define SEQUOTE 5 /* inside $'' */ 37 #define SBRACE 6 /* inside ${} */ 38 #define SQBRACE 7 /* inside "${}" */ 39 #define SBQUOTE 8 /* inside `` */ 40 #define SASPAREN 9 /* inside $(( )) */ 41 #define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */ 42 #define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */ 43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 44 #define SADELIM 13 /* like SBASE, looking for delimiter */ 45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 int start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 } Lex_state; 81 #define ls_base u.base 82 #define ls_start u.start 83 #define ls_bool u.abool 84 #define ls_adelim u.adelim 85 86 typedef struct { 87 Lex_state *base; 88 Lex_state *end; 89 } State_info; 90 91 static void readhere(struct ioword *); 92 static void ungetsc(int); 93 static void ungetsc_i(int); 94 static int getsc_uu(void); 95 static void getsc_line(Source *); 96 static int getsc_bn(void); 97 static int s_get(void); 98 static void s_put(int); 99 static char *get_brace_var(XString *, char *); 100 static bool arraysub(char **); 101 static void gethere(bool); 102 static Lex_state *push_state_i(State_info *, Lex_state *); 103 static Lex_state *pop_state_i(State_info *, Lex_state *); 104 105 static int backslash_skip; 106 static int ignore_backslash_newline; 107 108 /* optimised getsc_bn() */ 109 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 110 !backslash_skip ? *source->str++ : getsc_bn()) 111 /* optimised getsc_uu() */ 112 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 113 114 /* retrace helper */ 115 #define o_getsc_r(carg) { \ 116 int cev = (carg); \ 117 struct sretrace_info *rp = retrace_info; \ 118 \ 119 while (rp) { \ 120 Xcheck(rp->xs, rp->xp); \ 121 *rp->xp++ = cev; \ 122 rp = rp->next; \ 123 } \ 124 \ 125 return (cev); \ 126 } 127 128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) 129 static int getsc(void); 130 131 static int 132 getsc(void) 133 { 134 o_getsc_r(o_getsc()); 135 } 136 #else 137 static int getsc_r(int); 138 139 static int 140 getsc_r(int c) 141 { 142 o_getsc_r(c); 143 } 144 145 #define getsc() getsc_r(o_getsc()) 146 #endif 147 148 #define STATE_BSIZE 8 149 150 #define PUSH_STATE(s) do { \ 151 if (++statep == state_info.end) \ 152 statep = push_state_i(&state_info, statep); \ 153 state = statep->type = (s); \ 154 } while (/* CONSTCOND */ 0) 155 156 #define POP_STATE() do { \ 157 if (--statep == state_info.base) \ 158 statep = pop_state_i(&state_info, statep); \ 159 state = statep->type; \ 160 } while (/* CONSTCOND */ 0) 161 162 #define PUSH_SRETRACE() do { \ 163 struct sretrace_info *ri; \ 164 \ 165 statep->ls_start = Xsavepos(ws, wp); \ 166 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 167 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 168 ri->next = retrace_info; \ 169 retrace_info = ri; \ 170 } while (/* CONSTCOND */ 0) 171 172 #define POP_SRETRACE() do { \ 173 wp = Xrestpos(ws, wp, statep->ls_start); \ 174 *retrace_info->xp = '\0'; \ 175 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 176 dp = (void *)retrace_info; \ 177 retrace_info = retrace_info->next; \ 178 afree(dp, ATEMP); \ 179 } while (/* CONSTCOND */ 0) 180 181 /** 182 * Lexical analyser 183 * 184 * tokens are not regular expressions, they are LL(1). 185 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 186 * hence the state stack. Note "$(...)" are now parsed recursively. 187 */ 188 189 int 190 yylex(int cf) 191 { 192 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 193 State_info state_info; 194 int c, c2, state; 195 size_t cz; 196 XString ws; /* expandable output word */ 197 char *wp; /* output word pointer */ 198 char *sp, *dp; 199 200 Again: 201 states[0].type = SINVALID; 202 states[0].ls_base = NULL; 203 statep = &states[1]; 204 state_info.base = states; 205 state_info.end = &state_info.base[STATE_BSIZE]; 206 207 Xinit(ws, wp, 64, ATEMP); 208 209 backslash_skip = 0; 210 ignore_backslash_newline = 0; 211 212 if (cf & ONEWORD) 213 state = SWORD; 214 else if (cf & LETEXPR) { 215 /* enclose arguments in (double) quotes */ 216 *wp++ = OQUOTE; 217 state = SLETPAREN; 218 statep->nparen = 0; 219 } else { 220 /* normal lexing */ 221 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 222 while ((c = getsc()) == ' ' || c == '\t') 223 ; 224 if (c == '#') { 225 ignore_backslash_newline++; 226 while ((c = getsc()) != '\0' && c != '\n') 227 ; 228 ignore_backslash_newline--; 229 } 230 ungetsc(c); 231 } 232 if (source->flags & SF_ALIAS) { 233 /* trailing ' ' in alias definition */ 234 source->flags &= ~SF_ALIAS; 235 cf |= ALIAS; 236 } 237 238 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 239 statep->type = state; 240 241 /* check for here string */ 242 if (state == SHEREDELIM) { 243 c = getsc(); 244 if (c == '<') { 245 state = SHEREDELIM; 246 while ((c = getsc()) == ' ' || c == '\t') 247 ; 248 ungetsc(c); 249 c = '<'; 250 goto accept_nonword; 251 } 252 ungetsc(c); 253 } 254 255 /* collect non-special or quoted characters to form word */ 256 while (!((c = getsc()) == 0 || 257 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { 258 if (state == SBASE && 259 subshell_nesting_type == /*{*/ '}' && 260 c == /*{*/ '}') 261 /* possibly end ${ :;} */ 262 break; 263 accept_nonword: 264 Xcheck(ws, wp); 265 switch (state) { 266 case SADELIM: 267 if (c == '(') 268 statep->nparen++; 269 else if (c == ')') 270 statep->nparen--; 271 else if (statep->nparen == 0 && (c == /*{*/ '}' || 272 c == (int)statep->ls_adelim.delimiter)) { 273 *wp++ = ADELIM; 274 *wp++ = c; 275 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) 276 POP_STATE(); 277 if (c == /*{*/ '}') 278 POP_STATE(); 279 break; 280 } 281 /* FALLTHROUGH */ 282 case SBASE: 283 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 284 /* temporary */ 285 *wp = EOS; 286 if (is_wdvarname(Xstring(ws, wp), false)) { 287 char *p, *tmp; 288 289 if (arraysub(&tmp)) { 290 *wp++ = CHAR; 291 *wp++ = c; 292 for (p = tmp; *p; ) { 293 Xcheck(ws, wp); 294 *wp++ = CHAR; 295 *wp++ = *p++; 296 } 297 afree(tmp, ATEMP); 298 break; 299 } else { 300 Source *s; 301 302 s = pushs(SREREAD, 303 source->areap); 304 s->start = s->str = 305 s->u.freeme = tmp; 306 s->next = source; 307 source = s; 308 } 309 } 310 *wp++ = CHAR; 311 *wp++ = c; 312 break; 313 } 314 /* FALLTHROUGH */ 315 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 316 if (c == '*' || c == '@' || c == '+' || c == '?' || 317 c == '!') { 318 c2 = getsc(); 319 if (c2 == '(' /*)*/ ) { 320 *wp++ = OPAT; 321 *wp++ = c; 322 PUSH_STATE(SPATTERN); 323 break; 324 } 325 ungetsc(c2); 326 } 327 /* FALLTHROUGH */ 328 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 329 switch (c) { 330 case '\\': 331 getsc_qchar: 332 if ((c = getsc())) { 333 /* trailing \ is lost */ 334 *wp++ = QCHAR; 335 *wp++ = c; 336 } 337 break; 338 case '\'': 339 open_ssquote_unless_heredoc: 340 if ((cf & HEREDOC)) 341 goto store_char; 342 *wp++ = OQUOTE; 343 ignore_backslash_newline++; 344 PUSH_STATE(SSQUOTE); 345 break; 346 case '"': 347 open_sdquote: 348 *wp++ = OQUOTE; 349 PUSH_STATE(SDQUOTE); 350 break; 351 case '$': 352 /* 353 * processing of dollar sign belongs into 354 * Subst, except for those which can open 355 * a string: $'' and $"" 356 */ 357 subst_dollar_ex: 358 c = getsc(); 359 switch (c) { 360 case '"': 361 goto open_sdquote; 362 case '\'': 363 goto open_sequote; 364 default: 365 goto SubstS; 366 } 367 default: 368 goto Subst; 369 } 370 break; 371 372 Subst: 373 switch (c) { 374 case '\\': 375 c = getsc(); 376 switch (c) { 377 case '"': 378 if ((cf & HEREDOC)) 379 goto heredocquote; 380 /* FALLTHROUGH */ 381 case '\\': 382 case '$': case '`': 383 store_qchar: 384 *wp++ = QCHAR; 385 *wp++ = c; 386 break; 387 default: 388 heredocquote: 389 Xcheck(ws, wp); 390 if (c) { 391 /* trailing \ is lost */ 392 *wp++ = CHAR; 393 *wp++ = '\\'; 394 *wp++ = CHAR; 395 *wp++ = c; 396 } 397 break; 398 } 399 break; 400 case '$': 401 c = getsc(); 402 SubstS: 403 if (c == '(') /*)*/ { 404 c = getsc(); 405 if (c == '(') /*)*/ { 406 *wp++ = EXPRSUB; 407 PUSH_STATE(SASPAREN); 408 statep->nparen = 2; 409 PUSH_SRETRACE(); 410 *retrace_info->xp++ = '('; 411 } else { 412 ungetsc(c); 413 subst_command: 414 c = COMSUB; 415 subst_command2: 416 sp = yyrecursive(c); 417 cz = strlen(sp) + 1; 418 XcheckN(ws, wp, cz); 419 *wp++ = c; 420 memcpy(wp, sp, cz); 421 wp += cz; 422 } 423 } else if (c == '{') /*}*/ { 424 if ((c = getsc()) == '|') { 425 /* 426 * non-subenvironment 427 * value substitution 428 */ 429 c = VALSUB; 430 goto subst_command2; 431 } else if (ctype(c, C_IFSWS)) { 432 /* 433 * non-subenvironment 434 * "command" substitution 435 */ 436 c = FUNSUB; 437 goto subst_command2; 438 } 439 ungetsc(c); 440 *wp++ = OSUBST; 441 *wp++ = '{'; /*}*/ 442 wp = get_brace_var(&ws, wp); 443 c = getsc(); 444 /* allow :# and :% (ksh88 compat) */ 445 if (c == ':') { 446 *wp++ = CHAR; 447 *wp++ = c; 448 c = getsc(); 449 if (c == ':') { 450 *wp++ = CHAR; 451 *wp++ = '0'; 452 *wp++ = ADELIM; 453 *wp++ = ':'; 454 PUSH_STATE(SBRACE); 455 PUSH_STATE(SADELIM); 456 statep->ls_adelim.delimiter = ':'; 457 statep->ls_adelim.num = 1; 458 statep->nparen = 0; 459 break; 460 } else if (ksh_isdigit(c) || 461 c == '('/*)*/ || c == ' ' || 462 /*XXX what else? */ 463 c == '$') { 464 /* substring subst. */ 465 if (c != ' ') { 466 *wp++ = CHAR; 467 *wp++ = ' '; 468 } 469 ungetsc(c); 470 PUSH_STATE(SBRACE); 471 PUSH_STATE(SADELIM); 472 statep->ls_adelim.delimiter = ':'; 473 statep->ls_adelim.num = 2; 474 statep->nparen = 0; 475 break; 476 } 477 } else if (c == '/') { 478 *wp++ = CHAR; 479 *wp++ = c; 480 if ((c = getsc()) == '/') { 481 *wp++ = ADELIM; 482 *wp++ = c; 483 } else 484 ungetsc(c); 485 PUSH_STATE(SBRACE); 486 PUSH_STATE(SADELIM); 487 statep->ls_adelim.delimiter = '/'; 488 statep->ls_adelim.num = 1; 489 statep->nparen = 0; 490 break; 491 } 492 /* 493 * If this is a trim operation, 494 * treat (,|,) specially in STBRACE. 495 */ 496 if (ctype(c, C_SUBOP2)) { 497 ungetsc(c); 498 if (Flag(FSH)) 499 PUSH_STATE(STBRACEBOURNE); 500 else 501 PUSH_STATE(STBRACEKORN); 502 } else { 503 ungetsc(c); 504 if (state == SDQUOTE || 505 state == SQBRACE) 506 PUSH_STATE(SQBRACE); 507 else 508 PUSH_STATE(SBRACE); 509 } 510 } else if (ksh_isalphx(c)) { 511 *wp++ = OSUBST; 512 *wp++ = 'X'; 513 do { 514 Xcheck(ws, wp); 515 *wp++ = c; 516 c = getsc(); 517 } while (ksh_isalnux(c)); 518 *wp++ = '\0'; 519 *wp++ = CSUBST; 520 *wp++ = 'X'; 521 ungetsc(c); 522 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 523 Xcheck(ws, wp); 524 *wp++ = OSUBST; 525 *wp++ = 'X'; 526 *wp++ = c; 527 *wp++ = '\0'; 528 *wp++ = CSUBST; 529 *wp++ = 'X'; 530 } else { 531 *wp++ = CHAR; 532 *wp++ = '$'; 533 ungetsc(c); 534 } 535 break; 536 case '`': 537 subst_gravis: 538 PUSH_STATE(SBQUOTE); 539 *wp++ = COMSUB; 540 /* 541 * Need to know if we are inside double quotes 542 * since sh/AT&T-ksh translate the \" to " in 543 * "`...\"...`". 544 * This is not done in POSIX mode (section 545 * 3.2.3, Double Quotes: "The backquote shall 546 * retain its special meaning introducing the 547 * other form of command substitution (see 548 * 3.6.3). The portion of the quoted string 549 * from the initial backquote and the 550 * characters up to the next backquote that 551 * is not preceded by a backslash (having 552 * escape characters removed) defines that 553 * command whose output replaces `...` when 554 * the word is expanded." 555 * Section 3.6.3, Command Substitution: 556 * "Within the backquoted style of command 557 * substitution, backslash shall retain its 558 * literal meaning, except when followed by 559 * $ ` \."). 560 */ 561 statep->ls_bool = false; 562 s2 = statep; 563 base = state_info.base; 564 while (/* CONSTCOND */ 1) { 565 for (; s2 != base; s2--) { 566 if (s2->type == SDQUOTE) { 567 statep->ls_bool = true; 568 break; 569 } 570 } 571 if (s2 != base) 572 break; 573 if (!(s2 = s2->ls_base)) 574 break; 575 base = s2-- - STATE_BSIZE; 576 } 577 break; 578 case QCHAR: 579 if (cf & LQCHAR) { 580 *wp++ = QCHAR; 581 *wp++ = getsc(); 582 break; 583 } 584 /* FALLTHROUGH */ 585 default: 586 store_char: 587 *wp++ = CHAR; 588 *wp++ = c; 589 } 590 break; 591 592 case SEQUOTE: 593 if (c == '\'') { 594 POP_STATE(); 595 *wp++ = CQUOTE; 596 ignore_backslash_newline--; 597 } else if (c == '\\') { 598 if ((c2 = unbksl(true, s_get, s_put)) == -1) 599 c2 = s_get(); 600 if (c2 == 0) 601 statep->ls_bool = true; 602 if (!statep->ls_bool) { 603 char ts[4]; 604 605 if ((unsigned int)c2 < 0x100) { 606 *wp++ = QCHAR; 607 *wp++ = c2; 608 } else { 609 cz = utf_wctomb(ts, c2 - 0x100); 610 ts[cz] = 0; 611 for (cz = 0; ts[cz]; ++cz) { 612 *wp++ = QCHAR; 613 *wp++ = ts[cz]; 614 } 615 } 616 } 617 } else if (!statep->ls_bool) { 618 *wp++ = QCHAR; 619 *wp++ = c; 620 } 621 break; 622 623 case SSQUOTE: 624 if (c == '\'') { 625 POP_STATE(); 626 if ((cf & HEREDOC) || state == SQBRACE) 627 goto store_char; 628 *wp++ = CQUOTE; 629 ignore_backslash_newline--; 630 } else { 631 *wp++ = QCHAR; 632 *wp++ = c; 633 } 634 break; 635 636 case SDQUOTE: 637 if (c == '"') { 638 POP_STATE(); 639 *wp++ = CQUOTE; 640 } else 641 goto Subst; 642 break; 643 644 /* $(( ... )) */ 645 case SASPAREN: 646 if (c == '(') 647 statep->nparen++; 648 else if (c == ')') { 649 statep->nparen--; 650 if (statep->nparen == 1) { 651 /* end of EXPRSUB */ 652 POP_SRETRACE(); 653 POP_STATE(); 654 655 if ((c2 = getsc()) == /*(*/ ')') { 656 cz = strlen(sp) - 2; 657 XcheckN(ws, wp, cz); 658 memcpy(wp, sp + 1, cz); 659 wp += cz; 660 afree(sp, ATEMP); 661 *wp++ = '\0'; 662 break; 663 } else { 664 Source *s; 665 666 ungetsc(c2); 667 /* 668 * mismatched parenthesis - 669 * assume we were really 670 * parsing a $(...) expression 671 */ 672 --wp; 673 s = pushs(SREREAD, 674 source->areap); 675 s->start = s->str = 676 s->u.freeme = sp; 677 s->next = source; 678 source = s; 679 goto subst_command; 680 } 681 } 682 } 683 /* reuse existing state machine */ 684 goto Sbase2; 685 686 case SQBRACE: 687 if (c == '\\') { 688 /* 689 * perform POSIX "quote removal" if the back- 690 * slash is "special", i.e. same cases as the 691 * {case '\\':} in Subst: plus closing brace; 692 * in mksh code "quote removal" on '\c' means 693 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 694 * emitted (in heredocquote:) 695 */ 696 if ((c = getsc()) == '"' || c == '\\' || 697 c == '$' || c == '`' || c == /*{*/'}') 698 goto store_qchar; 699 goto heredocquote; 700 } 701 goto common_SQBRACE; 702 703 case SBRACE: 704 if (c == '\'') 705 goto open_ssquote_unless_heredoc; 706 else if (c == '\\') 707 goto getsc_qchar; 708 common_SQBRACE: 709 if (c == '"') 710 goto open_sdquote; 711 else if (c == '$') 712 goto subst_dollar_ex; 713 else if (c == '`') 714 goto subst_gravis; 715 else if (c != /*{*/ '}') 716 goto store_char; 717 POP_STATE(); 718 *wp++ = CSUBST; 719 *wp++ = /*{*/ '}'; 720 break; 721 722 /* Same as SBASE, except (,|,) treated specially */ 723 case STBRACEKORN: 724 if (c == '|') 725 *wp++ = SPAT; 726 else if (c == '(') { 727 *wp++ = OPAT; 728 /* simile for @ */ 729 *wp++ = ' '; 730 PUSH_STATE(SPATTERN); 731 } else /* FALLTHROUGH */ 732 case STBRACEBOURNE: 733 if (c == /*{*/ '}') { 734 POP_STATE(); 735 *wp++ = CSUBST; 736 *wp++ = /*{*/ '}'; 737 } else 738 goto Sbase1; 739 break; 740 741 case SBQUOTE: 742 if (c == '`') { 743 *wp++ = 0; 744 POP_STATE(); 745 } else if (c == '\\') { 746 switch (c = getsc()) { 747 case 0: 748 /* trailing \ is lost */ 749 break; 750 case '\\': 751 case '$': case '`': 752 *wp++ = c; 753 break; 754 case '"': 755 if (statep->ls_bool) { 756 *wp++ = c; 757 break; 758 } 759 /* FALLTHROUGH */ 760 default: 761 *wp++ = '\\'; 762 *wp++ = c; 763 break; 764 } 765 } else 766 *wp++ = c; 767 break; 768 769 /* ONEWORD */ 770 case SWORD: 771 goto Subst; 772 773 /* LETEXPR: (( ... )) */ 774 case SLETPAREN: 775 if (c == /*(*/ ')') { 776 if (statep->nparen > 0) 777 --statep->nparen; 778 else if ((c2 = getsc()) == /*(*/ ')') { 779 c = 0; 780 *wp++ = CQUOTE; 781 goto Done; 782 } else { 783 Source *s; 784 785 ungetsc(c2); 786 /* 787 * mismatched parenthesis - 788 * assume we were really 789 * parsing a (...) expression 790 */ 791 *wp = EOS; 792 sp = Xstring(ws, wp); 793 dp = wdstrip(sp, WDS_KEEPQ); 794 s = pushs(SREREAD, source->areap); 795 s->start = s->str = s->u.freeme = dp; 796 s->next = source; 797 source = s; 798 return ('('/*)*/); 799 } 800 } else if (c == '(') 801 /* 802 * parentheses inside quotes and 803 * backslashes are lost, but AT&T ksh 804 * doesn't count them either 805 */ 806 ++statep->nparen; 807 goto Sbase2; 808 809 /* <<, <<-, <<< delimiter */ 810 case SHEREDELIM: 811 /* 812 * here delimiters need a special case since 813 * $ and `...` are not to be treated specially 814 */ 815 switch (c) { 816 case '\\': 817 if ((c = getsc())) { 818 /* trailing \ is lost */ 819 *wp++ = QCHAR; 820 *wp++ = c; 821 } 822 break; 823 case '\'': 824 goto open_ssquote_unless_heredoc; 825 case '$': 826 if ((c2 = getsc()) == '\'') { 827 open_sequote: 828 *wp++ = OQUOTE; 829 ignore_backslash_newline++; 830 PUSH_STATE(SEQUOTE); 831 statep->ls_bool = false; 832 break; 833 } else if (c2 == '"') { 834 /* FALLTHROUGH */ 835 case '"': 836 state = statep->type = SHEREDQUOTE; 837 PUSH_SRETRACE(); 838 break; 839 } 840 ungetsc(c2); 841 /* FALLTHROUGH */ 842 default: 843 *wp++ = CHAR; 844 *wp++ = c; 845 } 846 break; 847 848 /* " in <<, <<-, <<< delimiter */ 849 case SHEREDQUOTE: 850 if (c != '"') 851 goto Subst; 852 POP_SRETRACE(); 853 dp = strnul(sp) - 1; 854 /* remove the trailing double quote */ 855 *dp = '\0'; 856 /* store the quoted string */ 857 *wp++ = OQUOTE; 858 XcheckN(ws, wp, (dp - sp)); 859 dp = sp; 860 while ((c = *dp++)) { 861 if (c == '\\') { 862 switch ((c = *dp++)) { 863 case '\\': 864 case '"': 865 case '$': 866 case '`': 867 break; 868 default: 869 *wp++ = CHAR; 870 *wp++ = '\\'; 871 break; 872 } 873 } 874 *wp++ = CHAR; 875 *wp++ = c; 876 } 877 afree(sp, ATEMP); 878 *wp++ = CQUOTE; 879 state = statep->type = SHEREDELIM; 880 break; 881 882 /* in *(...|...) pattern (*+?@!) */ 883 case SPATTERN: 884 if (c == /*(*/ ')') { 885 *wp++ = CPAT; 886 POP_STATE(); 887 } else if (c == '|') { 888 *wp++ = SPAT; 889 } else if (c == '(') { 890 *wp++ = OPAT; 891 /* simile for @ */ 892 *wp++ = ' '; 893 PUSH_STATE(SPATTERN); 894 } else 895 goto Sbase1; 896 break; 897 } 898 } 899 Done: 900 Xcheck(ws, wp); 901 if (statep != &states[1]) 902 /* XXX figure out what is missing */ 903 yyerror("no closing quote\n"); 904 905 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 906 if (state == SHEREDELIM) 907 state = SBASE; 908 909 dp = Xstring(ws, wp); 910 if (state == SBASE && ( 911 #ifndef MKSH_LEGACY_MODE 912 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || 913 #endif 914 c == '<' || c == '>')) { 915 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 916 917 if (Xlength(ws, wp) == 0) 918 iop->unit = c == '<' ? 0 : 1; 919 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { 920 if (dp[c2] != CHAR) 921 goto no_iop; 922 if (!ksh_isdigit(dp[c2 + 1])) 923 goto no_iop; 924 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; 925 } 926 927 if (iop->unit >= FDBASE) 928 goto no_iop; 929 930 if (c == '&') { 931 if ((c2 = getsc()) != '>') { 932 ungetsc(c2); 933 goto no_iop; 934 } 935 c = c2; 936 iop->flag = IOBASH; 937 } else 938 iop->flag = 0; 939 940 c2 = getsc(); 941 /* <<, >>, <> are ok, >< is not */ 942 if (c == c2 || (c == '<' && c2 == '>')) { 943 iop->flag |= c == c2 ? 944 (c == '>' ? IOCAT : IOHERE) : IORDWR; 945 if (iop->flag == IOHERE) { 946 if ((c2 = getsc()) == '-') { 947 iop->flag |= IOSKIP; 948 c2 = getsc(); 949 } else if (c2 == '<') 950 iop->flag |= IOHERESTR; 951 ungetsc(c2); 952 if (c2 == '\n') 953 iop->flag |= IONDELIM; 954 } 955 } else if (c2 == '&') 956 iop->flag |= IODUP | (c == '<' ? IORDUP : 0); 957 else { 958 iop->flag |= c == '>' ? IOWRITE : IOREAD; 959 if (c == '>' && c2 == '|') 960 iop->flag |= IOCLOB; 961 else 962 ungetsc(c2); 963 } 964 965 iop->name = NULL; 966 iop->delim = NULL; 967 iop->heredoc = NULL; 968 /* free word */ 969 Xfree(ws, wp); 970 yylval.iop = iop; 971 return (REDIR); 972 no_iop: 973 afree(iop, ATEMP); 974 } 975 976 if (wp == dp && state == SBASE) { 977 /* free word */ 978 Xfree(ws, wp); 979 /* no word, process LEX1 character */ 980 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { 981 if ((c2 = getsc()) == c) 982 c = (c == ';') ? BREAK : 983 (c == '|') ? LOGOR : 984 (c == '&') ? LOGAND : 985 /* c == '(' ) */ MDPAREN; 986 else if (c == '|' && c2 == '&') 987 c = COPROC; 988 else if (c == ';' && c2 == '|') 989 c = BRKEV; 990 else if (c == ';' && c2 == '&') 991 c = BRKFT; 992 else 993 ungetsc(c2); 994 #ifndef MKSH_SMALL 995 if (c == BREAK) { 996 if ((c2 = getsc()) == '&') 997 c = BRKEV; 998 else 999 ungetsc(c2); 1000 } 1001 #endif 1002 } else if (c == '\n') { 1003 gethere(false); 1004 if (cf & CONTIN) 1005 goto Again; 1006 } else if (c == '\0') 1007 /* need here strings at EOF */ 1008 gethere(true); 1009 return (c); 1010 } 1011 1012 /* terminate word */ 1013 *wp++ = EOS; 1014 yylval.cp = Xclose(ws, wp); 1015 if (state == SWORD || state == SLETPAREN 1016 /* XXX ONEWORD? */) 1017 return (LWORD); 1018 1019 /* unget terminator */ 1020 ungetsc(c); 1021 1022 /* 1023 * note: the alias-vs-function code below depends on several 1024 * interna: starting from here, source->str is not modified; 1025 * the way getsc() and ungetsc() operate; etc. 1026 */ 1027 1028 /* copy word to unprefixed string ident */ 1029 sp = yylval.cp; 1030 dp = ident; 1031 if ((cf & HEREDELIM) && (sp[1] == '<')) 1032 while ((dp - ident) < IDENT) { 1033 if ((c = *sp++) == CHAR) 1034 *dp++ = *sp++; 1035 else if ((c != OQUOTE) && (c != CQUOTE)) 1036 break; 1037 } 1038 else 1039 while ((dp - ident) < IDENT && (c = *sp++) == CHAR) 1040 *dp++ = *sp++; 1041 /* Make sure the ident array stays '\0' padded */ 1042 memset(dp, 0, (ident + IDENT) - dp + 1); 1043 if (c != EOS) 1044 /* word is not unquoted */ 1045 *ident = '\0'; 1046 1047 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) { 1048 struct tbl *p; 1049 uint32_t h = hash(ident); 1050 1051 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1052 (!(cf & ESACONLY) || p->val.i == ESAC || 1053 p->val.i == /*{*/ '}')) { 1054 afree(yylval.cp, ATEMP); 1055 return (p->val.i); 1056 } 1057 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1058 (p->flag & ISSET)) { 1059 /* 1060 * this still points to the same character as the 1061 * ungetsc'd terminator from above 1062 */ 1063 const char *cp = source->str; 1064 1065 /* prefer POSIX but not Korn functions over aliases */ 1066 while (*cp == ' ' || *cp == '\t') 1067 /* 1068 * this is like getsc() without skipping 1069 * over Source boundaries (including not 1070 * parsing ungetsc'd characters that got 1071 * pushed into an SREREAD) which is what 1072 * we want here anyway: find out whether 1073 * the alias name is followed by a POSIX 1074 * function definition (only the opening 1075 * parenthesis is checked though) 1076 */ 1077 ++cp; 1078 /* prefer functions over aliases */ 1079 if (cp[0] != '(' || cp[1] != ')') { 1080 Source *s = source; 1081 1082 while (s && (s->flags & SF_HASALIAS)) 1083 if (s->u.tblp == p) 1084 return (LWORD); 1085 else 1086 s = s->next; 1087 /* push alias expansion */ 1088 s = pushs(SALIAS, source->areap); 1089 s->start = s->str = p->val.s; 1090 s->u.tblp = p; 1091 s->flags |= SF_HASALIAS; 1092 s->next = source; 1093 if (source->type == SEOF) { 1094 /* prevent infinite recursion at EOS */ 1095 source->u.tblp = p; 1096 source->flags |= SF_HASALIAS; 1097 } 1098 source = s; 1099 afree(yylval.cp, ATEMP); 1100 goto Again; 1101 } 1102 } 1103 } 1104 1105 return (LWORD); 1106 } 1107 1108 static void 1109 gethere(bool iseof) 1110 { 1111 struct ioword **p; 1112 1113 for (p = heres; p < herep; p++) 1114 if (iseof && !((*p)->flag & IOHERESTR)) 1115 /* only here strings at EOF */ 1116 return; 1117 else 1118 readhere(*p); 1119 herep = heres; 1120 } 1121 1122 /* 1123 * read "<<word" text into temp file 1124 */ 1125 1126 static void 1127 readhere(struct ioword *iop) 1128 { 1129 int c; 1130 const char *eof, *eofp; 1131 XString xs; 1132 char *xp; 1133 int xpos; 1134 1135 if (iop->flag & IOHERESTR) { 1136 /* process the here string */ 1137 iop->heredoc = xp = evalstr(iop->delim, DOBLANK); 1138 xpos = strlen(xp) - 1; 1139 memmove(xp, xp + 1, xpos); 1140 xp[xpos] = '\n'; 1141 return; 1142 } 1143 1144 eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0); 1145 1146 if (!(iop->flag & IOEVAL)) 1147 ignore_backslash_newline++; 1148 1149 Xinit(xs, xp, 256, ATEMP); 1150 1151 heredoc_read_line: 1152 /* beginning of line */ 1153 eofp = eof; 1154 xpos = Xsavepos(xs, xp); 1155 if (iop->flag & IOSKIP) { 1156 /* skip over leading tabs */ 1157 while ((c = getsc()) == '\t') 1158 /* nothing */; 1159 goto heredoc_parse_char; 1160 } 1161 heredoc_read_char: 1162 c = getsc(); 1163 heredoc_parse_char: 1164 /* compare with here document marker */ 1165 if (!*eofp) { 1166 /* end of here document marker, what to do? */ 1167 switch (c) { 1168 case /*(*/ ')': 1169 if (!subshell_nesting_type) 1170 /*- 1171 * not allowed outside $(...) or (...) 1172 * => mismatch 1173 */ 1174 break; 1175 /* allow $(...) or (...) to close here */ 1176 ungetsc(/*(*/ ')'); 1177 /* FALLTHROUGH */ 1178 case 0: 1179 /* 1180 * Allow EOF here to commands without trailing 1181 * newlines (mksh -c '...') will work as well. 1182 */ 1183 case '\n': 1184 /* Newline terminates here document marker */ 1185 goto heredoc_found_terminator; 1186 } 1187 } else if (c == *eofp++) 1188 /* store; then read and compare next character */ 1189 goto heredoc_store_and_loop; 1190 /* nope, mismatch; read until end of line */ 1191 while (c != '\n') { 1192 if (!c) 1193 /* oops, reached EOF */ 1194 yyerror("%s '%s' unclosed\n", "here document", eof); 1195 /* store character */ 1196 Xcheck(xs, xp); 1197 Xput(xs, xp, c); 1198 /* read next character */ 1199 c = getsc(); 1200 } 1201 /* we read a newline as last character */ 1202 heredoc_store_and_loop: 1203 /* store character */ 1204 Xcheck(xs, xp); 1205 Xput(xs, xp, c); 1206 if (c == '\n') 1207 goto heredoc_read_line; 1208 goto heredoc_read_char; 1209 1210 heredoc_found_terminator: 1211 /* jump back to saved beginning of line */ 1212 xp = Xrestpos(xs, xp, xpos); 1213 /* terminate, close and store */ 1214 Xput(xs, xp, '\0'); 1215 iop->heredoc = Xclose(xs, xp); 1216 1217 if (!(iop->flag & IOEVAL)) 1218 ignore_backslash_newline--; 1219 } 1220 1221 void 1222 yyerror(const char *fmt, ...) 1223 { 1224 va_list va; 1225 1226 /* pop aliases and re-reads */ 1227 while (source->type == SALIAS || source->type == SREREAD) 1228 source = source->next; 1229 /* zap pending input */ 1230 source->str = null; 1231 1232 error_prefix(true); 1233 va_start(va, fmt); 1234 shf_vfprintf(shl_out, fmt, va); 1235 va_end(va); 1236 errorfz(); 1237 } 1238 1239 /* 1240 * input for yylex with alias expansion 1241 */ 1242 1243 Source * 1244 pushs(int type, Area *areap) 1245 { 1246 Source *s; 1247 1248 s = alloc(sizeof(Source), areap); 1249 memset(s, 0, sizeof(Source)); 1250 s->type = type; 1251 s->str = null; 1252 s->areap = areap; 1253 if (type == SFILE || type == SSTDIN) 1254 XinitN(s->xs, 256, s->areap); 1255 return (s); 1256 } 1257 1258 static int 1259 getsc_uu(void) 1260 { 1261 Source *s = source; 1262 int c; 1263 1264 while ((c = *s->str++) == 0) { 1265 /* return 0 for EOF by default */ 1266 s->str = NULL; 1267 switch (s->type) { 1268 case SEOF: 1269 s->str = null; 1270 return (0); 1271 1272 case SSTDIN: 1273 case SFILE: 1274 getsc_line(s); 1275 break; 1276 1277 case SWSTR: 1278 break; 1279 1280 case SSTRING: 1281 case SSTRINGCMDLINE: 1282 break; 1283 1284 case SWORDS: 1285 s->start = s->str = *s->u.strv++; 1286 s->type = SWORDSEP; 1287 break; 1288 1289 case SWORDSEP: 1290 if (*s->u.strv == NULL) { 1291 s->start = s->str = "\n"; 1292 s->type = SEOF; 1293 } else { 1294 s->start = s->str = " "; 1295 s->type = SWORDS; 1296 } 1297 break; 1298 1299 case SALIAS: 1300 if (s->flags & SF_ALIASEND) { 1301 /* pass on an unused SF_ALIAS flag */ 1302 source = s->next; 1303 source->flags |= s->flags & SF_ALIAS; 1304 s = source; 1305 } else if (*s->u.tblp->val.s && 1306 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { 1307 /* pop source stack */ 1308 source = s = s->next; 1309 /* 1310 * Note that this alias ended with a 1311 * space, enabling alias expansion on 1312 * the following word. 1313 */ 1314 s->flags |= SF_ALIAS; 1315 } else { 1316 /* 1317 * At this point, we need to keep the current 1318 * alias in the source list so recursive 1319 * aliases can be detected and we also need to 1320 * return the next character. Do this by 1321 * temporarily popping the alias to get the 1322 * next character and then put it back in the 1323 * source list with the SF_ALIASEND flag set. 1324 */ 1325 /* pop source stack */ 1326 source = s->next; 1327 source->flags |= s->flags & SF_ALIAS; 1328 c = getsc_uu(); 1329 if (c) { 1330 s->flags |= SF_ALIASEND; 1331 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1332 s->start = s->str = s->ugbuf; 1333 s->next = source; 1334 source = s; 1335 } else { 1336 s = source; 1337 /* avoid reading EOF twice */ 1338 s->str = NULL; 1339 break; 1340 } 1341 } 1342 continue; 1343 1344 case SREREAD: 1345 if (s->start != s->ugbuf) 1346 /* yuck */ 1347 afree(s->u.freeme, ATEMP); 1348 source = s = s->next; 1349 continue; 1350 } 1351 if (s->str == NULL) { 1352 s->type = SEOF; 1353 s->start = s->str = null; 1354 return ('\0'); 1355 } 1356 if (s->flags & SF_ECHO) { 1357 shf_puts(s->str, shl_out); 1358 shf_flush(shl_out); 1359 } 1360 } 1361 return (c); 1362 } 1363 1364 static void 1365 getsc_line(Source *s) 1366 { 1367 char *xp = Xstring(s->xs, xp), *cp; 1368 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1369 bool have_tty = tobool(interactive && (s->flags & SF_TTY)); 1370 1371 /* Done here to ensure nothing odd happens when a timeout occurs */ 1372 XcheckN(s->xs, xp, LINE); 1373 *xp = '\0'; 1374 s->start = s->str = xp; 1375 1376 if (have_tty && ksh_tmout) { 1377 ksh_tmout_state = TMOUT_READING; 1378 alarm(ksh_tmout); 1379 } 1380 if (interactive) 1381 change_winsz(); 1382 #ifndef MKSH_NO_CMDLINE_EDITING 1383 if (have_tty && ( 1384 #if !MKSH_S_NOVI 1385 Flag(FVI) || 1386 #endif 1387 Flag(FEMACS) || Flag(FGMACS))) { 1388 int nread; 1389 1390 nread = x_read(xp); 1391 if (nread < 0) 1392 /* read error */ 1393 nread = 0; 1394 xp[nread] = '\0'; 1395 xp += nread; 1396 } else 1397 #endif 1398 { 1399 if (interactive) 1400 pprompt(prompt, 0); 1401 else 1402 s->line++; 1403 1404 while (/* CONSTCOND */ 1) { 1405 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1406 1407 if (!p && shf_error(s->u.shf) && 1408 shf_errno(s->u.shf) == EINTR) { 1409 shf_clearerr(s->u.shf); 1410 if (trap) 1411 runtraps(0); 1412 continue; 1413 } 1414 if (!p || (xp = p, xp[-1] == '\n')) 1415 break; 1416 /* double buffer size */ 1417 /* move past NUL so doubling works... */ 1418 xp++; 1419 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1420 /* ...and move back again */ 1421 xp--; 1422 } 1423 /* 1424 * flush any unwanted input so other programs/builtins 1425 * can read it. Not very optimal, but less error prone 1426 * than flushing else where, dealing with redirections, 1427 * etc. 1428 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1429 */ 1430 if (s->type == SSTDIN) 1431 shf_flush(s->u.shf); 1432 } 1433 /* 1434 * XXX: temporary kludge to restore source after a 1435 * trap may have been executed. 1436 */ 1437 source = s; 1438 if (have_tty && ksh_tmout) { 1439 ksh_tmout_state = TMOUT_EXECUTING; 1440 alarm(0); 1441 } 1442 cp = Xstring(s->xs, xp); 1443 s->start = s->str = cp; 1444 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1445 /* Note: if input is all nulls, this is not eof */ 1446 if (Xlength(s->xs, xp) == 0) { 1447 /* EOF */ 1448 if (s->type == SFILE) 1449 shf_fdclose(s->u.shf); 1450 s->str = NULL; 1451 } else if (interactive && *s->str && 1452 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) { 1453 histsave(&s->line, s->str, true, true); 1454 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1455 } else if (interactive && cur_prompt == PS1) { 1456 cp = Xstring(s->xs, xp); 1457 while (*cp && ctype(*cp, C_IFSWS)) 1458 ++cp; 1459 if (!*cp) 1460 histsync(); 1461 #endif 1462 } 1463 if (interactive) 1464 set_prompt(PS2, NULL); 1465 } 1466 1467 void 1468 set_prompt(int to, Source *s) 1469 { 1470 cur_prompt = to; 1471 1472 switch (to) { 1473 /* command */ 1474 case PS1: 1475 /* 1476 * Substitute ! and !! here, before substitutions are done 1477 * so ! in expanded variables are not expanded. 1478 * NOTE: this is not what AT&T ksh does (it does it after 1479 * substitutions, POSIX doesn't say which is to be done. 1480 */ 1481 { 1482 struct shf *shf; 1483 char * volatile ps1; 1484 Area *saved_atemp; 1485 1486 ps1 = str_val(global("PS1")); 1487 shf = shf_sopen(NULL, strlen(ps1) * 2, 1488 SHF_WR | SHF_DYNAMIC, NULL); 1489 while (*ps1) 1490 if (*ps1 != '!' || *++ps1 == '!') 1491 shf_putchar(*ps1++, shf); 1492 else 1493 shf_fprintf(shf, "%d", 1494 s ? s->line + 1 : 0); 1495 ps1 = shf_sclose(shf); 1496 saved_atemp = ATEMP; 1497 newenv(E_ERRH); 1498 if (kshsetjmp(e->jbuf)) { 1499 prompt = safe_prompt; 1500 /* 1501 * Don't print an error - assume it has already 1502 * been printed. Reason is we may have forked 1503 * to run a command and the child may be 1504 * unwinding its stack through this code as it 1505 * exits. 1506 */ 1507 } else { 1508 char *cp = substitute(ps1, 0); 1509 strdupx(prompt, cp, saved_atemp); 1510 } 1511 quitenv(NULL); 1512 } 1513 break; 1514 /* command continuation */ 1515 case PS2: 1516 prompt = str_val(global("PS2")); 1517 break; 1518 } 1519 } 1520 1521 int 1522 pprompt(const char *cp, int ntruncate) 1523 { 1524 int columns = 0, lines = 0; 1525 bool indelimit = false; 1526 char delimiter = 0; 1527 1528 /* 1529 * Undocumented AT&T ksh feature: 1530 * If the second char in the prompt string is \r then the first 1531 * char is taken to be a non-printing delimiter and any chars 1532 * between two instances of the delimiter are not considered to 1533 * be part of the prompt length 1534 */ 1535 if (*cp && cp[1] == '\r') { 1536 delimiter = *cp; 1537 cp += 2; 1538 } 1539 for (; *cp; cp++) { 1540 if (indelimit && *cp != delimiter) 1541 ; 1542 else if (*cp == '\n' || *cp == '\r') { 1543 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1544 columns = 0; 1545 } else if (*cp == '\t') { 1546 columns = (columns | 7) + 1; 1547 } else if (*cp == '\b') { 1548 if (columns > 0) 1549 columns--; 1550 } else if (*cp == delimiter) 1551 indelimit = !indelimit; 1552 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { 1553 const char *cp2; 1554 columns += utf_widthadj(cp, &cp2); 1555 if (indelimit || 1556 (ntruncate < (x_cols * lines + columns))) 1557 shf_write(cp, cp2 - cp, shl_out); 1558 cp = cp2 - /* loop increment */ 1; 1559 continue; 1560 } else 1561 columns++; 1562 if ((*cp != delimiter) && 1563 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1564 shf_putc(*cp, shl_out); 1565 } 1566 shf_flush(shl_out); 1567 return (x_cols * lines + columns); 1568 } 1569 1570 /* 1571 * Read the variable part of a ${...} expression (i.e. up to but not 1572 * including the :[-+?=#%] or close-brace). 1573 */ 1574 static char * 1575 get_brace_var(XString *wsp, char *wp) 1576 { 1577 char c; 1578 enum parse_state { 1579 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1580 PS_NUMBER, PS_VAR1 1581 } state = PS_INITIAL; 1582 1583 while (/* CONSTCOND */ 1) { 1584 c = getsc(); 1585 /* State machine to figure out where the variable part ends. */ 1586 switch (state) { 1587 case PS_INITIAL: 1588 if (c == '#' || c == '!' || c == '%') { 1589 state = PS_SAW_HASH; 1590 break; 1591 } 1592 /* FALLTHROUGH */ 1593 case PS_SAW_HASH: 1594 if (ksh_isalphx(c)) 1595 state = PS_IDENT; 1596 else if (ksh_isdigit(c)) 1597 state = PS_NUMBER; 1598 else if (c == '#') { 1599 if (state == PS_SAW_HASH) { 1600 char c2; 1601 1602 c2 = getsc(); 1603 ungetsc(c2); 1604 if (c2 != /*{*/ '}') { 1605 ungetsc(c); 1606 goto out; 1607 } 1608 } 1609 state = PS_VAR1; 1610 } else if (ctype(c, C_VAR1)) 1611 state = PS_VAR1; 1612 else 1613 goto out; 1614 break; 1615 case PS_IDENT: 1616 if (!ksh_isalnux(c)) { 1617 if (c == '[') { 1618 char *tmp, *p; 1619 1620 if (!arraysub(&tmp)) 1621 yyerror("missing ]\n"); 1622 *wp++ = c; 1623 for (p = tmp; *p; ) { 1624 Xcheck(*wsp, wp); 1625 *wp++ = *p++; 1626 } 1627 afree(tmp, ATEMP); 1628 /* the ] */ 1629 c = getsc(); 1630 } 1631 goto out; 1632 } 1633 break; 1634 case PS_NUMBER: 1635 if (!ksh_isdigit(c)) 1636 goto out; 1637 break; 1638 case PS_VAR1: 1639 goto out; 1640 } 1641 Xcheck(*wsp, wp); 1642 *wp++ = c; 1643 } 1644 out: 1645 /* end of variable part */ 1646 *wp++ = '\0'; 1647 ungetsc(c); 1648 return (wp); 1649 } 1650 1651 /* 1652 * Save an array subscript - returns true if matching bracket found, false 1653 * if eof or newline was found. 1654 * (Returned string double null terminated) 1655 */ 1656 static bool 1657 arraysub(char **strp) 1658 { 1659 XString ws; 1660 char *wp, c; 1661 /* we are just past the initial [ */ 1662 unsigned int depth = 1; 1663 1664 Xinit(ws, wp, 32, ATEMP); 1665 1666 do { 1667 c = getsc(); 1668 Xcheck(ws, wp); 1669 *wp++ = c; 1670 if (c == '[') 1671 depth++; 1672 else if (c == ']') 1673 depth--; 1674 } while (depth > 0 && c && c != '\n'); 1675 1676 *wp++ = '\0'; 1677 *strp = Xclose(ws, wp); 1678 1679 return (tobool(depth == 0)); 1680 } 1681 1682 /* Unget a char: handles case when we are already at the start of the buffer */ 1683 static void 1684 ungetsc(int c) 1685 { 1686 struct sretrace_info *rp = retrace_info; 1687 1688 if (backslash_skip) 1689 backslash_skip--; 1690 /* Don't unget EOF... */ 1691 if (source->str == null && c == '\0') 1692 return; 1693 while (rp) { 1694 if (Xlength(rp->xs, rp->xp)) 1695 rp->xp--; 1696 rp = rp->next; 1697 } 1698 ungetsc_i(c); 1699 } 1700 static void 1701 ungetsc_i(int c) 1702 { 1703 if (source->str > source->start) 1704 source->str--; 1705 else { 1706 Source *s; 1707 1708 s = pushs(SREREAD, source->areap); 1709 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1710 s->start = s->str = s->ugbuf; 1711 s->next = source; 1712 source = s; 1713 } 1714 } 1715 1716 1717 /* Called to get a char that isn't a \newline sequence. */ 1718 static int 1719 getsc_bn(void) 1720 { 1721 int c, c2; 1722 1723 if (ignore_backslash_newline) 1724 return (o_getsc_u()); 1725 1726 if (backslash_skip == 1) { 1727 backslash_skip = 2; 1728 return (o_getsc_u()); 1729 } 1730 1731 backslash_skip = 0; 1732 1733 while (/* CONSTCOND */ 1) { 1734 c = o_getsc_u(); 1735 if (c == '\\') { 1736 if ((c2 = o_getsc_u()) == '\n') 1737 /* ignore the \newline; get the next char... */ 1738 continue; 1739 ungetsc_i(c2); 1740 backslash_skip = 1; 1741 } 1742 return (c); 1743 } 1744 } 1745 1746 void 1747 yyskiputf8bom(void) 1748 { 1749 int c; 1750 1751 if ((unsigned char)(c = o_getsc_u()) != 0xEF) { 1752 ungetsc_i(c); 1753 return; 1754 } 1755 if ((unsigned char)(c = o_getsc_u()) != 0xBB) { 1756 ungetsc_i(c); 1757 ungetsc_i(0xEF); 1758 return; 1759 } 1760 if ((unsigned char)(c = o_getsc_u()) != 0xBF) { 1761 ungetsc_i(c); 1762 ungetsc_i(0xBB); 1763 ungetsc_i(0xEF); 1764 return; 1765 } 1766 UTFMODE |= 8; 1767 } 1768 1769 static Lex_state * 1770 push_state_i(State_info *si, Lex_state *old_end) 1771 { 1772 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1773 1774 news[0].ls_base = old_end; 1775 si->base = &news[0]; 1776 si->end = &news[STATE_BSIZE]; 1777 return (&news[1]); 1778 } 1779 1780 static Lex_state * 1781 pop_state_i(State_info *si, Lex_state *old_end) 1782 { 1783 Lex_state *old_base = si->base; 1784 1785 si->base = old_end->ls_base - STATE_BSIZE; 1786 si->end = old_end->ls_base; 1787 1788 afree(old_base, ATEMP); 1789 1790 return (si->base + STATE_BSIZE - 1); 1791 } 1792 1793 static int 1794 s_get(void) 1795 { 1796 return (getsc()); 1797 } 1798 1799 static void 1800 s_put(int c) 1801 { 1802 ungetsc(c); 1803 } 1804