1 /* $OpenBSD: lex.c,v 1.49 2013/12/17 16:37:06 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 * 2011, 2012, 2013, 2014, 2015 6 * Thorsten Glaser <tg (at) mirbsd.org> 7 * 8 * Provided that these terms and disclaimer and all copyright notices 9 * are retained or reproduced in an accompanying document, permission 10 * is granted to deal in this work without restriction, including un- 11 * limited rights to use, publicly perform, distribute, sell, modify, 12 * merge, give away, or sublicence. 13 * 14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 15 * the utmost extent permitted by applicable law, neither express nor 16 * implied; without malicious intent or gross negligence. In no event 17 * may a licensor, author or contributor be held liable for indirect, 18 * direct, other damage, loss, or other issues arising in any way out 19 * of dealing in the work, even if advised of the possibility of such 20 * damage or existence of a defect, except proven that it results out 21 * of said person's immediate fault when using the work as intended. 22 */ 23 24 #include "sh.h" 25 26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.193.2.5 2015/04/19 19:18:19 tg Exp $"); 27 28 /* 29 * states while lexing word 30 */ 31 #define SBASE 0 /* outside any lexical constructs */ 32 #define SWORD 1 /* implicit quoting for substitute() */ 33 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 34 #define SSQUOTE 3 /* inside '' */ 35 #define SDQUOTE 4 /* inside "" */ 36 #define SEQUOTE 5 /* inside $'' */ 37 #define SBRACE 6 /* inside ${} */ 38 #define SQBRACE 7 /* inside "${}" */ 39 #define SBQUOTE 8 /* inside `` */ 40 #define SASPAREN 9 /* inside $(( )) */ 41 #define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */ 42 #define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */ 43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 44 #define SADELIM 13 /* like SBASE, looking for delimiter */ 45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 int start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 } Lex_state; 81 #define ls_base u.base 82 #define ls_start u.start 83 #define ls_bool u.abool 84 #define ls_adelim u.adelim 85 86 typedef struct { 87 Lex_state *base; 88 Lex_state *end; 89 } State_info; 90 91 static void readhere(struct ioword *); 92 static void ungetsc(int); 93 static void ungetsc_i(int); 94 static int getsc_uu(void); 95 static void getsc_line(Source *); 96 static int getsc_bn(void); 97 static int s_get(void); 98 static void s_put(int); 99 static char *get_brace_var(XString *, char *); 100 static bool arraysub(char **); 101 static void gethere(bool); 102 static Lex_state *push_state_i(State_info *, Lex_state *); 103 static Lex_state *pop_state_i(State_info *, Lex_state *); 104 105 static int backslash_skip; 106 static int ignore_backslash_newline; 107 108 /* optimised getsc_bn() */ 109 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 110 !backslash_skip ? *source->str++ : getsc_bn()) 111 /* optimised getsc_uu() */ 112 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 113 114 /* retrace helper */ 115 #define o_getsc_r(carg) { \ 116 int cev = (carg); \ 117 struct sretrace_info *rp = retrace_info; \ 118 \ 119 while (rp) { \ 120 Xcheck(rp->xs, rp->xp); \ 121 *rp->xp++ = cev; \ 122 rp = rp->next; \ 123 } \ 124 \ 125 return (cev); \ 126 } 127 128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST) 129 static int getsc(void); 130 131 static int 132 getsc(void) 133 { 134 o_getsc_r(o_getsc()); 135 } 136 #else 137 static int getsc_r(int); 138 139 static int 140 getsc_r(int c) 141 { 142 o_getsc_r(c); 143 } 144 145 #define getsc() getsc_r(o_getsc()) 146 #endif 147 148 #define STATE_BSIZE 8 149 150 #define PUSH_STATE(s) do { \ 151 if (++statep == state_info.end) \ 152 statep = push_state_i(&state_info, statep); \ 153 state = statep->type = (s); \ 154 } while (/* CONSTCOND */ 0) 155 156 #define POP_STATE() do { \ 157 if (--statep == state_info.base) \ 158 statep = pop_state_i(&state_info, statep); \ 159 state = statep->type; \ 160 } while (/* CONSTCOND */ 0) 161 162 #define PUSH_SRETRACE(s) do { \ 163 struct sretrace_info *ri; \ 164 \ 165 PUSH_STATE(s); \ 166 statep->ls_start = Xsavepos(ws, wp); \ 167 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 168 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 169 ri->next = retrace_info; \ 170 retrace_info = ri; \ 171 } while (/* CONSTCOND */ 0) 172 173 #define POP_SRETRACE() do { \ 174 wp = Xrestpos(ws, wp, statep->ls_start); \ 175 *retrace_info->xp = '\0'; \ 176 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 177 dp = (void *)retrace_info; \ 178 retrace_info = retrace_info->next; \ 179 afree(dp, ATEMP); \ 180 POP_STATE(); \ 181 } while (/* CONSTCOND */ 0) 182 183 /** 184 * Lexical analyser 185 * 186 * tokens are not regular expressions, they are LL(1). 187 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 188 * hence the state stack. Note "$(...)" are now parsed recursively. 189 */ 190 191 int 192 yylex(int cf) 193 { 194 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 195 State_info state_info; 196 int c, c2, state; 197 size_t cz; 198 XString ws; /* expandable output word */ 199 char *wp; /* output word pointer */ 200 char *sp, *dp; 201 202 Again: 203 states[0].type = SINVALID; 204 states[0].ls_base = NULL; 205 statep = &states[1]; 206 state_info.base = states; 207 state_info.end = &state_info.base[STATE_BSIZE]; 208 209 Xinit(ws, wp, 64, ATEMP); 210 211 backslash_skip = 0; 212 ignore_backslash_newline = 0; 213 214 if (cf & ONEWORD) 215 state = SWORD; 216 else if (cf & LETEXPR) { 217 /* enclose arguments in (double) quotes */ 218 *wp++ = OQUOTE; 219 state = SLETPAREN; 220 statep->nparen = 0; 221 } else { 222 /* normal lexing */ 223 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 224 while ((c = getsc()) == ' ' || c == '\t') 225 ; 226 if (c == '#') { 227 ignore_backslash_newline++; 228 while ((c = getsc()) != '\0' && c != '\n') 229 ; 230 ignore_backslash_newline--; 231 } 232 ungetsc(c); 233 } 234 if (source->flags & SF_ALIAS) { 235 /* trailing ' ' in alias definition */ 236 source->flags &= ~SF_ALIAS; 237 cf |= ALIAS; 238 } 239 240 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 241 statep->type = state; 242 243 /* check for here string */ 244 if (state == SHEREDELIM) { 245 c = getsc(); 246 if (c == '<') { 247 state = SHEREDELIM; 248 while ((c = getsc()) == ' ' || c == '\t') 249 ; 250 ungetsc(c); 251 c = '<'; 252 goto accept_nonword; 253 } 254 ungetsc(c); 255 } 256 257 /* collect non-special or quoted characters to form word */ 258 while (!((c = getsc()) == 0 || 259 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) { 260 if (state == SBASE && 261 subshell_nesting_type == /*{*/ '}' && 262 c == /*{*/ '}') 263 /* possibly end ${ :;} */ 264 break; 265 accept_nonword: 266 Xcheck(ws, wp); 267 switch (state) { 268 case SADELIM: 269 if (c == '(') 270 statep->nparen++; 271 else if (c == ')') 272 statep->nparen--; 273 else if (statep->nparen == 0 && (c == /*{*/ '}' || 274 c == (int)statep->ls_adelim.delimiter)) { 275 *wp++ = ADELIM; 276 *wp++ = c; 277 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) 278 POP_STATE(); 279 if (c == /*{*/ '}') 280 POP_STATE(); 281 break; 282 } 283 /* FALLTHROUGH */ 284 case SBASE: 285 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 286 /* temporary */ 287 *wp = EOS; 288 if (is_wdvarname(Xstring(ws, wp), false)) { 289 char *p, *tmp; 290 291 if (arraysub(&tmp)) { 292 *wp++ = CHAR; 293 *wp++ = c; 294 for (p = tmp; *p; ) { 295 Xcheck(ws, wp); 296 *wp++ = CHAR; 297 *wp++ = *p++; 298 } 299 afree(tmp, ATEMP); 300 break; 301 } else { 302 Source *s; 303 304 s = pushs(SREREAD, 305 source->areap); 306 s->start = s->str = 307 s->u.freeme = tmp; 308 s->next = source; 309 source = s; 310 } 311 } 312 *wp++ = CHAR; 313 *wp++ = c; 314 break; 315 } 316 /* FALLTHROUGH */ 317 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 318 if (c == '*' || c == '@' || c == '+' || c == '?' || 319 c == '!') { 320 c2 = getsc(); 321 if (c2 == '(' /*)*/ ) { 322 *wp++ = OPAT; 323 *wp++ = c; 324 PUSH_STATE(SPATTERN); 325 break; 326 } 327 ungetsc(c2); 328 } 329 /* FALLTHROUGH */ 330 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 331 switch (c) { 332 case '\\': 333 getsc_qchar: 334 if ((c = getsc())) { 335 /* trailing \ is lost */ 336 *wp++ = QCHAR; 337 *wp++ = c; 338 } 339 break; 340 case '\'': 341 open_ssquote_unless_heredoc: 342 if ((cf & HEREDOC)) 343 goto store_char; 344 *wp++ = OQUOTE; 345 ignore_backslash_newline++; 346 PUSH_STATE(SSQUOTE); 347 break; 348 case '"': 349 open_sdquote: 350 *wp++ = OQUOTE; 351 PUSH_STATE(SDQUOTE); 352 break; 353 case '$': 354 /* 355 * processing of dollar sign belongs into 356 * Subst, except for those which can open 357 * a string: $'' and $"" 358 */ 359 subst_dollar_ex: 360 c = getsc(); 361 switch (c) { 362 case '"': 363 goto open_sdquote; 364 case '\'': 365 goto open_sequote; 366 default: 367 goto SubstS; 368 } 369 default: 370 goto Subst; 371 } 372 break; 373 374 Subst: 375 switch (c) { 376 case '\\': 377 c = getsc(); 378 switch (c) { 379 case '"': 380 if ((cf & HEREDOC)) 381 goto heredocquote; 382 /* FALLTHROUGH */ 383 case '\\': 384 case '$': case '`': 385 store_qchar: 386 *wp++ = QCHAR; 387 *wp++ = c; 388 break; 389 default: 390 heredocquote: 391 Xcheck(ws, wp); 392 if (c) { 393 /* trailing \ is lost */ 394 *wp++ = CHAR; 395 *wp++ = '\\'; 396 *wp++ = CHAR; 397 *wp++ = c; 398 } 399 break; 400 } 401 break; 402 case '$': 403 c = getsc(); 404 SubstS: 405 if (c == '(') /*)*/ { 406 c = getsc(); 407 if (c == '(') /*)*/ { 408 *wp++ = EXPRSUB; 409 PUSH_SRETRACE(SASPAREN); 410 statep->nparen = 2; 411 *retrace_info->xp++ = '('; 412 } else { 413 ungetsc(c); 414 subst_command: 415 c = COMSUB; 416 subst_command2: 417 sp = yyrecursive(c); 418 cz = strlen(sp) + 1; 419 XcheckN(ws, wp, cz); 420 *wp++ = c; 421 memcpy(wp, sp, cz); 422 wp += cz; 423 } 424 } else if (c == '{') /*}*/ { 425 if ((c = getsc()) == '|') { 426 /* 427 * non-subenvironment 428 * value substitution 429 */ 430 c = VALSUB; 431 goto subst_command2; 432 } else if (ctype(c, C_IFSWS)) { 433 /* 434 * non-subenvironment 435 * "command" substitution 436 */ 437 c = FUNSUB; 438 goto subst_command2; 439 } 440 ungetsc(c); 441 *wp++ = OSUBST; 442 *wp++ = '{'; /*}*/ 443 wp = get_brace_var(&ws, wp); 444 c = getsc(); 445 /* allow :# and :% (ksh88 compat) */ 446 if (c == ':') { 447 *wp++ = CHAR; 448 *wp++ = c; 449 c = getsc(); 450 if (c == ':') { 451 *wp++ = CHAR; 452 *wp++ = '0'; 453 *wp++ = ADELIM; 454 *wp++ = ':'; 455 PUSH_STATE(SBRACE); 456 PUSH_STATE(SADELIM); 457 statep->ls_adelim.delimiter = ':'; 458 statep->ls_adelim.num = 1; 459 statep->nparen = 0; 460 break; 461 } else if (ksh_isdigit(c) || 462 c == '('/*)*/ || c == ' ' || 463 /*XXX what else? */ 464 c == '$') { 465 /* substring subst. */ 466 if (c != ' ') { 467 *wp++ = CHAR; 468 *wp++ = ' '; 469 } 470 ungetsc(c); 471 PUSH_STATE(SBRACE); 472 PUSH_STATE(SADELIM); 473 statep->ls_adelim.delimiter = ':'; 474 statep->ls_adelim.num = 2; 475 statep->nparen = 0; 476 break; 477 } 478 } else if (c == '/') { 479 *wp++ = CHAR; 480 *wp++ = c; 481 if ((c = getsc()) == '/') { 482 *wp++ = ADELIM; 483 *wp++ = c; 484 } else 485 ungetsc(c); 486 PUSH_STATE(SBRACE); 487 PUSH_STATE(SADELIM); 488 statep->ls_adelim.delimiter = '/'; 489 statep->ls_adelim.num = 1; 490 statep->nparen = 0; 491 break; 492 } 493 /* 494 * If this is a trim operation, 495 * treat (,|,) specially in STBRACE. 496 */ 497 if (ctype(c, C_SUBOP2)) { 498 ungetsc(c); 499 if (Flag(FSH)) 500 PUSH_STATE(STBRACEBOURNE); 501 else 502 PUSH_STATE(STBRACEKORN); 503 } else { 504 ungetsc(c); 505 if (state == SDQUOTE || 506 state == SQBRACE) 507 PUSH_STATE(SQBRACE); 508 else 509 PUSH_STATE(SBRACE); 510 } 511 } else if (ksh_isalphx(c)) { 512 *wp++ = OSUBST; 513 *wp++ = 'X'; 514 do { 515 Xcheck(ws, wp); 516 *wp++ = c; 517 c = getsc(); 518 } while (ksh_isalnux(c)); 519 *wp++ = '\0'; 520 *wp++ = CSUBST; 521 *wp++ = 'X'; 522 ungetsc(c); 523 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 524 Xcheck(ws, wp); 525 *wp++ = OSUBST; 526 *wp++ = 'X'; 527 *wp++ = c; 528 *wp++ = '\0'; 529 *wp++ = CSUBST; 530 *wp++ = 'X'; 531 } else { 532 *wp++ = CHAR; 533 *wp++ = '$'; 534 ungetsc(c); 535 } 536 break; 537 case '`': 538 subst_gravis: 539 PUSH_STATE(SBQUOTE); 540 *wp++ = COMSUB; 541 /* 542 * Need to know if we are inside double quotes 543 * since sh/AT&T-ksh translate the \" to " in 544 * "`...\"...`". 545 * This is not done in POSIX mode (section 546 * 3.2.3, Double Quotes: "The backquote shall 547 * retain its special meaning introducing the 548 * other form of command substitution (see 549 * 3.6.3). The portion of the quoted string 550 * from the initial backquote and the 551 * characters up to the next backquote that 552 * is not preceded by a backslash (having 553 * escape characters removed) defines that 554 * command whose output replaces `...` when 555 * the word is expanded." 556 * Section 3.6.3, Command Substitution: 557 * "Within the backquoted style of command 558 * substitution, backslash shall retain its 559 * literal meaning, except when followed by 560 * $ ` \."). 561 */ 562 statep->ls_bool = false; 563 s2 = statep; 564 base = state_info.base; 565 while (/* CONSTCOND */ 1) { 566 for (; s2 != base; s2--) { 567 if (s2->type == SDQUOTE) { 568 statep->ls_bool = true; 569 break; 570 } 571 } 572 if (s2 != base) 573 break; 574 if (!(s2 = s2->ls_base)) 575 break; 576 base = s2-- - STATE_BSIZE; 577 } 578 break; 579 case QCHAR: 580 if (cf & LQCHAR) { 581 *wp++ = QCHAR; 582 *wp++ = getsc(); 583 break; 584 } 585 /* FALLTHROUGH */ 586 default: 587 store_char: 588 *wp++ = CHAR; 589 *wp++ = c; 590 } 591 break; 592 593 case SEQUOTE: 594 if (c == '\'') { 595 POP_STATE(); 596 *wp++ = CQUOTE; 597 ignore_backslash_newline--; 598 } else if (c == '\\') { 599 if ((c2 = unbksl(true, s_get, s_put)) == -1) 600 c2 = s_get(); 601 if (c2 == 0) 602 statep->ls_bool = true; 603 if (!statep->ls_bool) { 604 char ts[4]; 605 606 if ((unsigned int)c2 < 0x100) { 607 *wp++ = QCHAR; 608 *wp++ = c2; 609 } else { 610 cz = utf_wctomb(ts, c2 - 0x100); 611 ts[cz] = 0; 612 for (cz = 0; ts[cz]; ++cz) { 613 *wp++ = QCHAR; 614 *wp++ = ts[cz]; 615 } 616 } 617 } 618 } else if (!statep->ls_bool) { 619 *wp++ = QCHAR; 620 *wp++ = c; 621 } 622 break; 623 624 case SSQUOTE: 625 if (c == '\'') { 626 POP_STATE(); 627 if ((cf & HEREDOC) || state == SQBRACE) 628 goto store_char; 629 *wp++ = CQUOTE; 630 ignore_backslash_newline--; 631 } else { 632 *wp++ = QCHAR; 633 *wp++ = c; 634 } 635 break; 636 637 case SDQUOTE: 638 if (c == '"') { 639 POP_STATE(); 640 *wp++ = CQUOTE; 641 } else 642 goto Subst; 643 break; 644 645 /* $(( ... )) */ 646 case SASPAREN: 647 if (c == '(') 648 statep->nparen++; 649 else if (c == ')') { 650 statep->nparen--; 651 if (statep->nparen == 1) { 652 /* end of EXPRSUB */ 653 POP_SRETRACE(); 654 655 if ((c2 = getsc()) == /*(*/ ')') { 656 cz = strlen(sp) - 2; 657 XcheckN(ws, wp, cz); 658 memcpy(wp, sp + 1, cz); 659 wp += cz; 660 afree(sp, ATEMP); 661 *wp++ = '\0'; 662 break; 663 } else { 664 Source *s; 665 666 ungetsc(c2); 667 /* 668 * mismatched parenthesis - 669 * assume we were really 670 * parsing a $(...) expression 671 */ 672 --wp; 673 s = pushs(SREREAD, 674 source->areap); 675 s->start = s->str = 676 s->u.freeme = sp; 677 s->next = source; 678 source = s; 679 goto subst_command; 680 } 681 } 682 } 683 /* reuse existing state machine */ 684 goto Sbase2; 685 686 case SQBRACE: 687 if (c == '\\') { 688 /* 689 * perform POSIX "quote removal" if the back- 690 * slash is "special", i.e. same cases as the 691 * {case '\\':} in Subst: plus closing brace; 692 * in mksh code "quote removal" on '\c' means 693 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 694 * emitted (in heredocquote:) 695 */ 696 if ((c = getsc()) == '"' || c == '\\' || 697 c == '$' || c == '`' || c == /*{*/'}') 698 goto store_qchar; 699 goto heredocquote; 700 } 701 goto common_SQBRACE; 702 703 case SBRACE: 704 if (c == '\'') 705 goto open_ssquote_unless_heredoc; 706 else if (c == '\\') 707 goto getsc_qchar; 708 common_SQBRACE: 709 if (c == '"') 710 goto open_sdquote; 711 else if (c == '$') 712 goto subst_dollar_ex; 713 else if (c == '`') 714 goto subst_gravis; 715 else if (c != /*{*/ '}') 716 goto store_char; 717 POP_STATE(); 718 *wp++ = CSUBST; 719 *wp++ = /*{*/ '}'; 720 break; 721 722 /* Same as SBASE, except (,|,) treated specially */ 723 case STBRACEKORN: 724 if (c == '|') 725 *wp++ = SPAT; 726 else if (c == '(') { 727 *wp++ = OPAT; 728 /* simile for @ */ 729 *wp++ = ' '; 730 PUSH_STATE(SPATTERN); 731 } else /* FALLTHROUGH */ 732 case STBRACEBOURNE: 733 if (c == /*{*/ '}') { 734 POP_STATE(); 735 *wp++ = CSUBST; 736 *wp++ = /*{*/ '}'; 737 } else 738 goto Sbase1; 739 break; 740 741 case SBQUOTE: 742 if (c == '`') { 743 *wp++ = 0; 744 POP_STATE(); 745 } else if (c == '\\') { 746 switch (c = getsc()) { 747 case 0: 748 /* trailing \ is lost */ 749 break; 750 case '\\': 751 case '$': case '`': 752 *wp++ = c; 753 break; 754 case '"': 755 if (statep->ls_bool) { 756 *wp++ = c; 757 break; 758 } 759 /* FALLTHROUGH */ 760 default: 761 *wp++ = '\\'; 762 *wp++ = c; 763 break; 764 } 765 } else 766 *wp++ = c; 767 break; 768 769 /* ONEWORD */ 770 case SWORD: 771 goto Subst; 772 773 /* LETEXPR: (( ... )) */ 774 case SLETPAREN: 775 if (c == /*(*/ ')') { 776 if (statep->nparen > 0) 777 --statep->nparen; 778 else if ((c2 = getsc()) == /*(*/ ')') { 779 c = 0; 780 *wp++ = CQUOTE; 781 goto Done; 782 } else { 783 Source *s; 784 785 ungetsc(c2); 786 /* 787 * mismatched parenthesis - 788 * assume we were really 789 * parsing a (...) expression 790 */ 791 *wp = EOS; 792 sp = Xstring(ws, wp); 793 dp = wdstrip(sp, WDS_KEEPQ); 794 s = pushs(SREREAD, source->areap); 795 s->start = s->str = s->u.freeme = dp; 796 s->next = source; 797 source = s; 798 return ('('/*)*/); 799 } 800 } else if (c == '(') 801 /* 802 * parentheses inside quotes and 803 * backslashes are lost, but AT&T ksh 804 * doesn't count them either 805 */ 806 ++statep->nparen; 807 goto Sbase2; 808 809 /* <<, <<-, <<< delimiter */ 810 case SHEREDELIM: 811 /* 812 * here delimiters need a special case since 813 * $ and `...` are not to be treated specially 814 */ 815 switch (c) { 816 case '\\': 817 if ((c = getsc())) { 818 /* trailing \ is lost */ 819 *wp++ = QCHAR; 820 *wp++ = c; 821 } 822 break; 823 case '\'': 824 goto open_ssquote_unless_heredoc; 825 case '$': 826 if ((c2 = getsc()) == '\'') { 827 open_sequote: 828 *wp++ = OQUOTE; 829 ignore_backslash_newline++; 830 PUSH_STATE(SEQUOTE); 831 statep->ls_bool = false; 832 break; 833 } else if (c2 == '"') { 834 /* FALLTHROUGH */ 835 case '"': 836 PUSH_SRETRACE(SHEREDQUOTE); 837 break; 838 } 839 ungetsc(c2); 840 /* FALLTHROUGH */ 841 default: 842 *wp++ = CHAR; 843 *wp++ = c; 844 } 845 break; 846 847 /* " in <<, <<-, <<< delimiter */ 848 case SHEREDQUOTE: 849 if (c != '"') 850 goto Subst; 851 POP_SRETRACE(); 852 dp = strnul(sp) - 1; 853 /* remove the trailing double quote */ 854 *dp = '\0'; 855 /* store the quoted string */ 856 *wp++ = OQUOTE; 857 XcheckN(ws, wp, (dp - sp) * 2); 858 dp = sp; 859 while ((c = *dp++)) { 860 if (c == '\\') { 861 switch ((c = *dp++)) { 862 case '\\': 863 case '"': 864 case '$': 865 case '`': 866 break; 867 default: 868 *wp++ = CHAR; 869 *wp++ = '\\'; 870 break; 871 } 872 } 873 *wp++ = CHAR; 874 *wp++ = c; 875 } 876 afree(sp, ATEMP); 877 *wp++ = CQUOTE; 878 state = statep->type = SHEREDELIM; 879 break; 880 881 /* in *(...|...) pattern (*+?@!) */ 882 case SPATTERN: 883 if (c == /*(*/ ')') { 884 *wp++ = CPAT; 885 POP_STATE(); 886 } else if (c == '|') { 887 *wp++ = SPAT; 888 } else if (c == '(') { 889 *wp++ = OPAT; 890 /* simile for @ */ 891 *wp++ = ' '; 892 PUSH_STATE(SPATTERN); 893 } else 894 goto Sbase1; 895 break; 896 } 897 } 898 Done: 899 Xcheck(ws, wp); 900 if (statep != &states[1]) 901 /* XXX figure out what is missing */ 902 yyerror("no closing quote\n"); 903 904 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 905 if (state == SHEREDELIM) 906 state = SBASE; 907 908 dp = Xstring(ws, wp); 909 if (state == SBASE && ( 910 #ifndef MKSH_LEGACY_MODE 911 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) || 912 #endif 913 c == '<' || c == '>')) { 914 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 915 916 if (Xlength(ws, wp) == 0) 917 iop->unit = c == '<' ? 0 : 1; 918 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { 919 if (dp[c2] != CHAR) 920 goto no_iop; 921 if (!ksh_isdigit(dp[c2 + 1])) 922 goto no_iop; 923 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; 924 if (iop->unit >= FDBASE) 925 goto no_iop; 926 } 927 928 if (c == '&') { 929 if ((c2 = getsc()) != '>') { 930 ungetsc(c2); 931 goto no_iop; 932 } 933 c = c2; 934 iop->ioflag = IOBASH; 935 } else 936 iop->ioflag = 0; 937 938 c2 = getsc(); 939 /* <<, >>, <> are ok, >< is not */ 940 if (c == c2 || (c == '<' && c2 == '>')) { 941 iop->ioflag |= c == c2 ? 942 (c == '>' ? IOCAT : IOHERE) : IORDWR; 943 if (iop->ioflag == IOHERE) { 944 if ((c2 = getsc()) == '-') { 945 iop->ioflag |= IOSKIP; 946 c2 = getsc(); 947 } else if (c2 == '<') 948 iop->ioflag |= IOHERESTR; 949 ungetsc(c2); 950 if (c2 == '\n') 951 iop->ioflag |= IONDELIM; 952 } 953 } else if (c2 == '&') 954 iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0); 955 else { 956 iop->ioflag |= c == '>' ? IOWRITE : IOREAD; 957 if (c == '>' && c2 == '|') 958 iop->ioflag |= IOCLOB; 959 else 960 ungetsc(c2); 961 } 962 963 iop->name = NULL; 964 iop->delim = NULL; 965 iop->heredoc = NULL; 966 /* free word */ 967 Xfree(ws, wp); 968 yylval.iop = iop; 969 return (REDIR); 970 no_iop: 971 afree(iop, ATEMP); 972 } 973 974 if (wp == dp && state == SBASE) { 975 /* free word */ 976 Xfree(ws, wp); 977 /* no word, process LEX1 character */ 978 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { 979 if ((c2 = getsc()) == c) 980 c = (c == ';') ? BREAK : 981 (c == '|') ? LOGOR : 982 (c == '&') ? LOGAND : 983 /* c == '(' ) */ MDPAREN; 984 else if (c == '|' && c2 == '&') 985 c = COPROC; 986 else if (c == ';' && c2 == '|') 987 c = BRKEV; 988 else if (c == ';' && c2 == '&') 989 c = BRKFT; 990 else 991 ungetsc(c2); 992 #ifndef MKSH_SMALL 993 if (c == BREAK) { 994 if ((c2 = getsc()) == '&') 995 c = BRKEV; 996 else 997 ungetsc(c2); 998 } 999 #endif 1000 } else if (c == '\n') { 1001 gethere(false); 1002 if (cf & CONTIN) 1003 goto Again; 1004 } else if (c == '\0') 1005 /* need here strings at EOF */ 1006 gethere(true); 1007 return (c); 1008 } 1009 1010 /* terminate word */ 1011 *wp++ = EOS; 1012 yylval.cp = Xclose(ws, wp); 1013 if (state == SWORD || state == SLETPAREN 1014 /* XXX ONEWORD? */) 1015 return (LWORD); 1016 1017 /* unget terminator */ 1018 ungetsc(c); 1019 1020 /* 1021 * note: the alias-vs-function code below depends on several 1022 * interna: starting from here, source->str is not modified; 1023 * the way getsc() and ungetsc() operate; etc. 1024 */ 1025 1026 /* copy word to unprefixed string ident */ 1027 sp = yylval.cp; 1028 dp = ident; 1029 if ((cf & HEREDELIM) && (sp[1] == '<')) { 1030 herestringloop: 1031 switch ((c = *sp++)) { 1032 case CHAR: 1033 ++sp; 1034 /* FALLTHROUGH */ 1035 case OQUOTE: 1036 case CQUOTE: 1037 goto herestringloop; 1038 default: 1039 break; 1040 } 1041 /* dummy value */ 1042 *dp++ = 'x'; 1043 } else 1044 while ((dp - ident) < IDENT && (c = *sp++) == CHAR) 1045 *dp++ = *sp++; 1046 if (c != EOS) 1047 /* word is not unquoted */ 1048 dp = ident; 1049 /* make sure the ident array stays NUL padded */ 1050 memset(dp, 0, (ident + IDENT) - dp + 1); 1051 1052 if (!(cf & (KEYWORD | ALIAS))) 1053 return (LWORD); 1054 1055 if (*ident != '\0') { 1056 struct tbl *p; 1057 uint32_t h = hash(ident); 1058 1059 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1060 (!(cf & ESACONLY) || p->val.i == ESAC || 1061 p->val.i == /*{*/ '}')) { 1062 afree(yylval.cp, ATEMP); 1063 return (p->val.i); 1064 } 1065 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1066 (p->flag & ISSET)) { 1067 /* 1068 * this still points to the same character as the 1069 * ungetsc'd terminator from above 1070 */ 1071 const char *cp = source->str; 1072 1073 /* prefer POSIX but not Korn functions over aliases */ 1074 while (*cp == ' ' || *cp == '\t') 1075 /* 1076 * this is like getsc() without skipping 1077 * over Source boundaries (including not 1078 * parsing ungetsc'd characters that got 1079 * pushed into an SREREAD) which is what 1080 * we want here anyway: find out whether 1081 * the alias name is followed by a POSIX 1082 * function definition 1083 */ 1084 ++cp; 1085 /* prefer functions over aliases */ 1086 if (cp[0] != '(' || cp[1] != ')') { 1087 Source *s = source; 1088 1089 while (s && (s->flags & SF_HASALIAS)) 1090 if (s->u.tblp == p) 1091 return (LWORD); 1092 else 1093 s = s->next; 1094 /* push alias expansion */ 1095 s = pushs(SALIAS, source->areap); 1096 s->start = s->str = p->val.s; 1097 s->u.tblp = p; 1098 s->flags |= SF_HASALIAS; 1099 s->next = source; 1100 if (source->type == SEOF) { 1101 /* prevent infinite recursion at EOS */ 1102 source->u.tblp = p; 1103 source->flags |= SF_HASALIAS; 1104 } 1105 source = s; 1106 afree(yylval.cp, ATEMP); 1107 goto Again; 1108 } 1109 } 1110 } else if (cf & ALIAS) { 1111 /* retain typeset et al. even when quoted */ 1112 if (assign_command((dp = wdstrip(yylval.cp, 0)))) 1113 strlcpy(ident, dp, sizeof(ident)); 1114 afree(dp, ATEMP); 1115 } 1116 1117 return (LWORD); 1118 } 1119 1120 static void 1121 gethere(bool iseof) 1122 { 1123 struct ioword **p; 1124 1125 for (p = heres; p < herep; p++) 1126 if (iseof && !((*p)->ioflag & IOHERESTR)) 1127 /* only here strings at EOF */ 1128 return; 1129 else 1130 readhere(*p); 1131 herep = heres; 1132 } 1133 1134 /* 1135 * read "<<word" text into temp file 1136 */ 1137 1138 static void 1139 readhere(struct ioword *iop) 1140 { 1141 int c; 1142 const char *eof, *eofp; 1143 XString xs; 1144 char *xp; 1145 int xpos; 1146 1147 if (iop->ioflag & IOHERESTR) { 1148 /* process the here string */ 1149 iop->heredoc = xp = evalstr(iop->delim, DOBLANK); 1150 xpos = strlen(xp) - 1; 1151 memmove(xp, xp + 1, xpos); 1152 xp[xpos] = '\n'; 1153 return; 1154 } 1155 1156 eof = iop->ioflag & IONDELIM ? "<<" : evalstr(iop->delim, 0); 1157 1158 if (!(iop->ioflag & IOEVAL)) 1159 ignore_backslash_newline++; 1160 1161 Xinit(xs, xp, 256, ATEMP); 1162 1163 heredoc_read_line: 1164 /* beginning of line */ 1165 eofp = eof; 1166 xpos = Xsavepos(xs, xp); 1167 if (iop->ioflag & IOSKIP) { 1168 /* skip over leading tabs */ 1169 while ((c = getsc()) == '\t') 1170 ; /* nothing */ 1171 goto heredoc_parse_char; 1172 } 1173 heredoc_read_char: 1174 c = getsc(); 1175 heredoc_parse_char: 1176 /* compare with here document marker */ 1177 if (!*eofp) { 1178 /* end of here document marker, what to do? */ 1179 switch (c) { 1180 case /*(*/ ')': 1181 if (!subshell_nesting_type) 1182 /*- 1183 * not allowed outside $(...) or (...) 1184 * => mismatch 1185 */ 1186 break; 1187 /* allow $(...) or (...) to close here */ 1188 ungetsc(/*(*/ ')'); 1189 /* FALLTHROUGH */ 1190 case 0: 1191 /* 1192 * Allow EOF here to commands without trailing 1193 * newlines (mksh -c '...') will work as well. 1194 */ 1195 case '\n': 1196 /* Newline terminates here document marker */ 1197 goto heredoc_found_terminator; 1198 } 1199 } else if (c == *eofp++) 1200 /* store; then read and compare next character */ 1201 goto heredoc_store_and_loop; 1202 /* nope, mismatch; read until end of line */ 1203 while (c != '\n') { 1204 if (!c) 1205 /* oops, reached EOF */ 1206 yyerror("%s '%s' unclosed\n", "here document", eof); 1207 /* store character */ 1208 Xcheck(xs, xp); 1209 Xput(xs, xp, c); 1210 /* read next character */ 1211 c = getsc(); 1212 } 1213 /* we read a newline as last character */ 1214 heredoc_store_and_loop: 1215 /* store character */ 1216 Xcheck(xs, xp); 1217 Xput(xs, xp, c); 1218 if (c == '\n') 1219 goto heredoc_read_line; 1220 goto heredoc_read_char; 1221 1222 heredoc_found_terminator: 1223 /* jump back to saved beginning of line */ 1224 xp = Xrestpos(xs, xp, xpos); 1225 /* terminate, close and store */ 1226 Xput(xs, xp, '\0'); 1227 iop->heredoc = Xclose(xs, xp); 1228 1229 if (!(iop->ioflag & IOEVAL)) 1230 ignore_backslash_newline--; 1231 } 1232 1233 void 1234 yyerror(const char *fmt, ...) 1235 { 1236 va_list va; 1237 1238 /* pop aliases and re-reads */ 1239 while (source->type == SALIAS || source->type == SREREAD) 1240 source = source->next; 1241 /* zap pending input */ 1242 source->str = null; 1243 1244 error_prefix(true); 1245 va_start(va, fmt); 1246 shf_vfprintf(shl_out, fmt, va); 1247 va_end(va); 1248 errorfz(); 1249 } 1250 1251 /* 1252 * input for yylex with alias expansion 1253 */ 1254 1255 Source * 1256 pushs(int type, Area *areap) 1257 { 1258 Source *s; 1259 1260 s = alloc(sizeof(Source), areap); 1261 memset(s, 0, sizeof(Source)); 1262 s->type = type; 1263 s->str = null; 1264 s->areap = areap; 1265 if (type == SFILE || type == SSTDIN) 1266 XinitN(s->xs, 256, s->areap); 1267 return (s); 1268 } 1269 1270 static int 1271 getsc_uu(void) 1272 { 1273 Source *s = source; 1274 int c; 1275 1276 while ((c = *s->str++) == 0) { 1277 /* return 0 for EOF by default */ 1278 s->str = NULL; 1279 switch (s->type) { 1280 case SEOF: 1281 s->str = null; 1282 return (0); 1283 1284 case SSTDIN: 1285 case SFILE: 1286 getsc_line(s); 1287 break; 1288 1289 case SWSTR: 1290 break; 1291 1292 case SSTRING: 1293 case SSTRINGCMDLINE: 1294 break; 1295 1296 case SWORDS: 1297 s->start = s->str = *s->u.strv++; 1298 s->type = SWORDSEP; 1299 break; 1300 1301 case SWORDSEP: 1302 if (*s->u.strv == NULL) { 1303 s->start = s->str = "\n"; 1304 s->type = SEOF; 1305 } else { 1306 s->start = s->str = " "; 1307 s->type = SWORDS; 1308 } 1309 break; 1310 1311 case SALIAS: 1312 if (s->flags & SF_ALIASEND) { 1313 /* pass on an unused SF_ALIAS flag */ 1314 source = s->next; 1315 source->flags |= s->flags & SF_ALIAS; 1316 s = source; 1317 } else if (*s->u.tblp->val.s && 1318 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { 1319 /* pop source stack */ 1320 source = s = s->next; 1321 /* 1322 * Note that this alias ended with a 1323 * space, enabling alias expansion on 1324 * the following word. 1325 */ 1326 s->flags |= SF_ALIAS; 1327 } else { 1328 /* 1329 * At this point, we need to keep the current 1330 * alias in the source list so recursive 1331 * aliases can be detected and we also need to 1332 * return the next character. Do this by 1333 * temporarily popping the alias to get the 1334 * next character and then put it back in the 1335 * source list with the SF_ALIASEND flag set. 1336 */ 1337 /* pop source stack */ 1338 source = s->next; 1339 source->flags |= s->flags & SF_ALIAS; 1340 c = getsc_uu(); 1341 if (c) { 1342 s->flags |= SF_ALIASEND; 1343 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1344 s->start = s->str = s->ugbuf; 1345 s->next = source; 1346 source = s; 1347 } else { 1348 s = source; 1349 /* avoid reading EOF twice */ 1350 s->str = NULL; 1351 break; 1352 } 1353 } 1354 continue; 1355 1356 case SREREAD: 1357 if (s->start != s->ugbuf) 1358 /* yuck */ 1359 afree(s->u.freeme, ATEMP); 1360 source = s = s->next; 1361 continue; 1362 } 1363 if (s->str == NULL) { 1364 s->type = SEOF; 1365 s->start = s->str = null; 1366 return ('\0'); 1367 } 1368 if (s->flags & SF_ECHO) { 1369 shf_puts(s->str, shl_out); 1370 shf_flush(shl_out); 1371 } 1372 } 1373 return (c); 1374 } 1375 1376 static void 1377 getsc_line(Source *s) 1378 { 1379 char *xp = Xstring(s->xs, xp), *cp; 1380 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1381 bool have_tty = tobool(interactive && (s->flags & SF_TTY)); 1382 1383 /* Done here to ensure nothing odd happens when a timeout occurs */ 1384 XcheckN(s->xs, xp, LINE); 1385 *xp = '\0'; 1386 s->start = s->str = xp; 1387 1388 if (have_tty && ksh_tmout) { 1389 ksh_tmout_state = TMOUT_READING; 1390 alarm(ksh_tmout); 1391 } 1392 if (interactive) 1393 change_winsz(); 1394 #ifndef MKSH_NO_CMDLINE_EDITING 1395 if (have_tty && ( 1396 #if !MKSH_S_NOVI 1397 Flag(FVI) || 1398 #endif 1399 Flag(FEMACS) || Flag(FGMACS))) { 1400 int nread; 1401 1402 nread = x_read(xp); 1403 if (nread < 0) 1404 /* read error */ 1405 nread = 0; 1406 xp[nread] = '\0'; 1407 xp += nread; 1408 } else 1409 #endif 1410 { 1411 if (interactive) 1412 pprompt(prompt, 0); 1413 else 1414 s->line++; 1415 1416 while (/* CONSTCOND */ 1) { 1417 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1418 1419 if (!p && shf_error(s->u.shf) && 1420 shf_errno(s->u.shf) == EINTR) { 1421 shf_clearerr(s->u.shf); 1422 if (trap) 1423 runtraps(0); 1424 continue; 1425 } 1426 if (!p || (xp = p, xp[-1] == '\n')) 1427 break; 1428 /* double buffer size */ 1429 /* move past NUL so doubling works... */ 1430 xp++; 1431 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1432 /* ...and move back again */ 1433 xp--; 1434 } 1435 /* 1436 * flush any unwanted input so other programs/builtins 1437 * can read it. Not very optimal, but less error prone 1438 * than flushing else where, dealing with redirections, 1439 * etc. 1440 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1441 */ 1442 if (s->type == SSTDIN) 1443 shf_flush(s->u.shf); 1444 } 1445 /* 1446 * XXX: temporary kludge to restore source after a 1447 * trap may have been executed. 1448 */ 1449 source = s; 1450 if (have_tty && ksh_tmout) { 1451 ksh_tmout_state = TMOUT_EXECUTING; 1452 alarm(0); 1453 } 1454 cp = Xstring(s->xs, xp); 1455 rndpush(cp); 1456 s->start = s->str = cp; 1457 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1458 /* Note: if input is all nulls, this is not eof */ 1459 if (Xlength(s->xs, xp) == 0) { 1460 /* EOF */ 1461 if (s->type == SFILE) 1462 shf_fdclose(s->u.shf); 1463 s->str = NULL; 1464 } else if (interactive && *s->str && 1465 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) { 1466 histsave(&s->line, s->str, true, true); 1467 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1468 } else if (interactive && cur_prompt == PS1) { 1469 cp = Xstring(s->xs, xp); 1470 while (*cp && ctype(*cp, C_IFSWS)) 1471 ++cp; 1472 if (!*cp) 1473 histsync(); 1474 #endif 1475 } 1476 if (interactive) 1477 set_prompt(PS2, NULL); 1478 } 1479 1480 void 1481 set_prompt(int to, Source *s) 1482 { 1483 cur_prompt = (uint8_t)to; 1484 1485 switch (to) { 1486 /* command */ 1487 case PS1: 1488 /* 1489 * Substitute ! and !! here, before substitutions are done 1490 * so ! in expanded variables are not expanded. 1491 * NOTE: this is not what AT&T ksh does (it does it after 1492 * substitutions, POSIX doesn't say which is to be done. 1493 */ 1494 { 1495 struct shf *shf; 1496 char * volatile ps1; 1497 Area *saved_atemp; 1498 1499 ps1 = str_val(global("PS1")); 1500 shf = shf_sopen(NULL, strlen(ps1) * 2, 1501 SHF_WR | SHF_DYNAMIC, NULL); 1502 while (*ps1) 1503 if (*ps1 != '!' || *++ps1 == '!') 1504 shf_putchar(*ps1++, shf); 1505 else 1506 shf_fprintf(shf, "%lu", s ? 1507 (unsigned long)s->line + 1 : 0UL); 1508 ps1 = shf_sclose(shf); 1509 saved_atemp = ATEMP; 1510 newenv(E_ERRH); 1511 if (kshsetjmp(e->jbuf)) { 1512 prompt = safe_prompt; 1513 /* 1514 * Don't print an error - assume it has already 1515 * been printed. Reason is we may have forked 1516 * to run a command and the child may be 1517 * unwinding its stack through this code as it 1518 * exits. 1519 */ 1520 } else { 1521 char *cp = substitute(ps1, 0); 1522 strdupx(prompt, cp, saved_atemp); 1523 } 1524 quitenv(NULL); 1525 } 1526 break; 1527 /* command continuation */ 1528 case PS2: 1529 prompt = str_val(global("PS2")); 1530 break; 1531 } 1532 } 1533 1534 int 1535 pprompt(const char *cp, int ntruncate) 1536 { 1537 char delimiter = 0; 1538 bool doprint = (ntruncate != -1); 1539 bool indelimit = false; 1540 int columns = 0, lines = 0; 1541 1542 /* 1543 * Undocumented AT&T ksh feature: 1544 * If the second char in the prompt string is \r then the first 1545 * char is taken to be a non-printing delimiter and any chars 1546 * between two instances of the delimiter are not considered to 1547 * be part of the prompt length 1548 */ 1549 if (*cp && cp[1] == '\r') { 1550 delimiter = *cp; 1551 cp += 2; 1552 } 1553 for (; *cp; cp++) { 1554 if (indelimit && *cp != delimiter) 1555 ; 1556 else if (*cp == '\n' || *cp == '\r') { 1557 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1558 columns = 0; 1559 } else if (*cp == '\t') { 1560 columns = (columns | 7) + 1; 1561 } else if (*cp == '\b') { 1562 if (columns > 0) 1563 columns--; 1564 } else if (*cp == delimiter) 1565 indelimit = !indelimit; 1566 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { 1567 const char *cp2; 1568 columns += utf_widthadj(cp, &cp2); 1569 if (doprint && (indelimit || 1570 (ntruncate < (x_cols * lines + columns)))) 1571 shf_write(cp, cp2 - cp, shl_out); 1572 cp = cp2 - /* loop increment */ 1; 1573 continue; 1574 } else 1575 columns++; 1576 if (doprint && (*cp != delimiter) && 1577 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1578 shf_putc(*cp, shl_out); 1579 } 1580 if (doprint) 1581 shf_flush(shl_out); 1582 return (x_cols * lines + columns); 1583 } 1584 1585 /* 1586 * Read the variable part of a ${...} expression (i.e. up to but not 1587 * including the :[-+?=#%] or close-brace). 1588 */ 1589 static char * 1590 get_brace_var(XString *wsp, char *wp) 1591 { 1592 char c; 1593 enum parse_state { 1594 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1595 PS_NUMBER, PS_VAR1 1596 } state = PS_INITIAL; 1597 1598 while (/* CONSTCOND */ 1) { 1599 c = getsc(); 1600 /* State machine to figure out where the variable part ends. */ 1601 switch (state) { 1602 case PS_INITIAL: 1603 if (c == '#' || c == '!' || c == '%') { 1604 state = PS_SAW_HASH; 1605 break; 1606 } 1607 /* FALLTHROUGH */ 1608 case PS_SAW_HASH: 1609 if (ksh_isalphx(c)) 1610 state = PS_IDENT; 1611 else if (ksh_isdigit(c)) 1612 state = PS_NUMBER; 1613 else if (c == '#') { 1614 if (state == PS_SAW_HASH) { 1615 char c2; 1616 1617 c2 = getsc(); 1618 ungetsc(c2); 1619 if (c2 != /*{*/ '}') { 1620 ungetsc(c); 1621 goto out; 1622 } 1623 } 1624 state = PS_VAR1; 1625 } else if (ctype(c, C_VAR1)) 1626 state = PS_VAR1; 1627 else 1628 goto out; 1629 break; 1630 case PS_IDENT: 1631 if (!ksh_isalnux(c)) { 1632 if (c == '[') { 1633 char *tmp, *p; 1634 1635 if (!arraysub(&tmp)) 1636 yyerror("missing ]\n"); 1637 *wp++ = c; 1638 for (p = tmp; *p; ) { 1639 Xcheck(*wsp, wp); 1640 *wp++ = *p++; 1641 } 1642 afree(tmp, ATEMP); 1643 /* the ] */ 1644 c = getsc(); 1645 } 1646 goto out; 1647 } 1648 break; 1649 case PS_NUMBER: 1650 if (!ksh_isdigit(c)) 1651 goto out; 1652 break; 1653 case PS_VAR1: 1654 goto out; 1655 } 1656 Xcheck(*wsp, wp); 1657 *wp++ = c; 1658 } 1659 out: 1660 /* end of variable part */ 1661 *wp++ = '\0'; 1662 ungetsc(c); 1663 return (wp); 1664 } 1665 1666 /* 1667 * Save an array subscript - returns true if matching bracket found, false 1668 * if eof or newline was found. 1669 * (Returned string double null terminated) 1670 */ 1671 static bool 1672 arraysub(char **strp) 1673 { 1674 XString ws; 1675 char *wp, c; 1676 /* we are just past the initial [ */ 1677 unsigned int depth = 1; 1678 1679 Xinit(ws, wp, 32, ATEMP); 1680 1681 do { 1682 c = getsc(); 1683 Xcheck(ws, wp); 1684 *wp++ = c; 1685 if (c == '[') 1686 depth++; 1687 else if (c == ']') 1688 depth--; 1689 } while (depth > 0 && c && c != '\n'); 1690 1691 *wp++ = '\0'; 1692 *strp = Xclose(ws, wp); 1693 1694 return (tobool(depth == 0)); 1695 } 1696 1697 /* Unget a char: handles case when we are already at the start of the buffer */ 1698 static void 1699 ungetsc(int c) 1700 { 1701 struct sretrace_info *rp = retrace_info; 1702 1703 if (backslash_skip) 1704 backslash_skip--; 1705 /* Don't unget EOF... */ 1706 if (source->str == null && c == '\0') 1707 return; 1708 while (rp) { 1709 if (Xlength(rp->xs, rp->xp)) 1710 rp->xp--; 1711 rp = rp->next; 1712 } 1713 ungetsc_i(c); 1714 } 1715 static void 1716 ungetsc_i(int c) 1717 { 1718 if (source->str > source->start) 1719 source->str--; 1720 else { 1721 Source *s; 1722 1723 s = pushs(SREREAD, source->areap); 1724 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1725 s->start = s->str = s->ugbuf; 1726 s->next = source; 1727 source = s; 1728 } 1729 } 1730 1731 1732 /* Called to get a char that isn't a \newline sequence. */ 1733 static int 1734 getsc_bn(void) 1735 { 1736 int c, c2; 1737 1738 if (ignore_backslash_newline) 1739 return (o_getsc_u()); 1740 1741 if (backslash_skip == 1) { 1742 backslash_skip = 2; 1743 return (o_getsc_u()); 1744 } 1745 1746 backslash_skip = 0; 1747 1748 while (/* CONSTCOND */ 1) { 1749 c = o_getsc_u(); 1750 if (c == '\\') { 1751 if ((c2 = o_getsc_u()) == '\n') 1752 /* ignore the \newline; get the next char... */ 1753 continue; 1754 ungetsc_i(c2); 1755 backslash_skip = 1; 1756 } 1757 return (c); 1758 } 1759 } 1760 1761 void 1762 yyskiputf8bom(void) 1763 { 1764 int c; 1765 1766 if ((unsigned char)(c = o_getsc_u()) != 0xEF) { 1767 ungetsc_i(c); 1768 return; 1769 } 1770 if ((unsigned char)(c = o_getsc_u()) != 0xBB) { 1771 ungetsc_i(c); 1772 ungetsc_i(0xEF); 1773 return; 1774 } 1775 if ((unsigned char)(c = o_getsc_u()) != 0xBF) { 1776 ungetsc_i(c); 1777 ungetsc_i(0xBB); 1778 ungetsc_i(0xEF); 1779 return; 1780 } 1781 UTFMODE |= 8; 1782 } 1783 1784 static Lex_state * 1785 push_state_i(State_info *si, Lex_state *old_end) 1786 { 1787 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1788 1789 news[0].ls_base = old_end; 1790 si->base = &news[0]; 1791 si->end = &news[STATE_BSIZE]; 1792 return (&news[1]); 1793 } 1794 1795 static Lex_state * 1796 pop_state_i(State_info *si, Lex_state *old_end) 1797 { 1798 Lex_state *old_base = si->base; 1799 1800 si->base = old_end->ls_base - STATE_BSIZE; 1801 si->end = old_end->ls_base; 1802 1803 afree(old_base, ATEMP); 1804 1805 return (si->base + STATE_BSIZE - 1); 1806 } 1807 1808 static int 1809 s_get(void) 1810 { 1811 return (getsc()); 1812 } 1813 1814 static void 1815 s_put(int c) 1816 { 1817 ungetsc(c); 1818 } 1819