1 /* $OpenBSD: lex.c,v 1.45 2011/03/09 09:30:39 okan Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 5 * Thorsten Glaser <tg (at) mirbsd.org> 6 * 7 * Provided that these terms and disclaimer and all copyright notices 8 * are retained or reproduced in an accompanying document, permission 9 * is granted to deal in this work without restriction, including un- 10 * limited rights to use, publicly perform, distribute, sell, modify, 11 * merge, give away, or sublicence. 12 * 13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 14 * the utmost extent permitted by applicable law, neither express nor 15 * implied; without malicious intent or gross negligence. In no event 16 * may a licensor, author or contributor be held liable for indirect, 17 * direct, other damage, loss, or other issues arising in any way out 18 * of dealing in the work, even if advised of the possibility of such 19 * damage or existence of a defect, except proven that it results out 20 * of said person's immediate fault when using the work as intended. 21 */ 22 23 #include "sh.h" 24 25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.156 2011/09/07 15:24:16 tg Exp $"); 26 27 /* 28 * states while lexing word 29 */ 30 #define SBASE 0 /* outside any lexical constructs */ 31 #define SWORD 1 /* implicit quoting for substitute() */ 32 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 33 #define SSQUOTE 3 /* inside '' */ 34 #define SDQUOTE 4 /* inside "" */ 35 #define SEQUOTE 5 /* inside $'' */ 36 #define SBRACE 6 /* inside ${} */ 37 #define SQBRACE 7 /* inside "${}" */ 38 #define SBQUOTE 8 /* inside `` */ 39 #define SASPAREN 9 /* inside $(( )) */ 40 #define SHEREDELIM 10 /* parsing <<,<<- delimiter */ 41 #define SHEREDQUOTE 11 /* parsing " in <<,<<- delimiter */ 42 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */ 43 #define SADELIM 13 /* like SBASE, looking for delimiter */ 44 #define SHERESTRING 14 /* parsing <<< string */ 45 #define STBRACEKORN 15 /* parsing ${...[#%]...} !FSH */ 46 #define STBRACEBOURNE 16 /* parsing ${...[#%]...} FSH */ 47 #define SINVALID 255 /* invalid state */ 48 49 struct sretrace_info { 50 struct sretrace_info *next; 51 XString xs; 52 char *xp; 53 }; 54 55 /* 56 * Structure to keep track of the lexing state and the various pieces of info 57 * needed for each particular state. 58 */ 59 typedef struct lex_state { 60 union { 61 /* point to the next state block */ 62 struct lex_state *base; 63 /* marks start of state output in output string */ 64 int start; 65 /* SBQUOTE: true if in double quotes: "`...`" */ 66 /* SEQUOTE: got NUL, ignore rest of string */ 67 bool abool; 68 /* SADELIM information */ 69 struct { 70 /* character to search for */ 71 unsigned char delimiter; 72 /* max. number of delimiters */ 73 unsigned char num; 74 } adelim; 75 } u; 76 /* count open parentheses */ 77 short nparen; 78 /* type of this state */ 79 uint8_t type; 80 } Lex_state; 81 #define ls_base u.base 82 #define ls_start u.start 83 #define ls_bool u.abool 84 #define ls_adelim u.adelim 85 86 typedef struct { 87 Lex_state *base; 88 Lex_state *end; 89 } State_info; 90 91 static void readhere(struct ioword *); 92 static void ungetsc(int); 93 static void ungetsc_(int); 94 static int getsc_uu(void); 95 static void getsc_line(Source *); 96 static int getsc_bn(void); 97 static int s_get(void); 98 static void s_put(int); 99 static char *get_brace_var(XString *, char *); 100 static bool arraysub(char **); 101 static void gethere(bool); 102 static Lex_state *push_state_(State_info *, Lex_state *); 103 static Lex_state *pop_state_(State_info *, Lex_state *); 104 105 static int dopprompt(const char *, int, bool); 106 void yyskiputf8bom(void); 107 108 static int backslash_skip; 109 static int ignore_backslash_newline; 110 static struct sretrace_info *retrace_info; 111 short subshell_nesting_level = 0; 112 113 /* optimised getsc_bn() */ 114 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \ 115 !backslash_skip ? *source->str++ : getsc_bn()) 116 /* optimised getsc_uu() */ 117 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu()) 118 119 /* retrace helper */ 120 #define o_getsc_r(carg) { \ 121 int cev = (carg); \ 122 struct sretrace_info *rp = retrace_info; \ 123 \ 124 while (rp) { \ 125 Xcheck(rp->xs, rp->xp); \ 126 *rp->xp++ = cev; \ 127 rp = rp->next; \ 128 } \ 129 \ 130 return (cev); \ 131 } 132 133 #ifdef MKSH_SMALL 134 static int getsc(void); 135 136 static int 137 getsc(void) 138 { 139 o_getsc_r(o_getsc()); 140 } 141 #else 142 static int getsc_r(int); 143 144 static int 145 getsc_r(int c) 146 { 147 o_getsc_r(c); 148 } 149 150 #define getsc() getsc_r(o_getsc()) 151 #endif 152 153 #define STATE_BSIZE 8 154 155 #define PUSH_STATE(s) do { \ 156 if (++statep == state_info.end) \ 157 statep = push_state_(&state_info, statep); \ 158 state = statep->type = (s); \ 159 } while (/* CONSTCOND */ 0) 160 161 #define POP_STATE() do { \ 162 if (--statep == state_info.base) \ 163 statep = pop_state_(&state_info, statep); \ 164 state = statep->type; \ 165 } while (/* CONSTCOND */ 0) 166 167 #define PUSH_SRETRACE() do { \ 168 struct sretrace_info *ri; \ 169 \ 170 statep->ls_start = Xsavepos(ws, wp); \ 171 ri = alloc(sizeof(struct sretrace_info), ATEMP); \ 172 Xinit(ri->xs, ri->xp, 64, ATEMP); \ 173 ri->next = retrace_info; \ 174 retrace_info = ri; \ 175 } while (/* CONSTCOND */ 0) 176 177 #define POP_SRETRACE() do { \ 178 wp = Xrestpos(ws, wp, statep->ls_start); \ 179 *retrace_info->xp = '\0'; \ 180 sp = Xstring(retrace_info->xs, retrace_info->xp); \ 181 dp = (void *)retrace_info; \ 182 retrace_info = retrace_info->next; \ 183 afree(dp, ATEMP); \ 184 } while (/* CONSTCOND */ 0) 185 186 /** 187 * Lexical analyser 188 * 189 * tokens are not regular expressions, they are LL(1). 190 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 191 * hence the state stack. Note "$(...)" are now parsed recursively. 192 */ 193 194 int 195 yylex(int cf) 196 { 197 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 198 State_info state_info; 199 int c, c2, state; 200 size_t cz; 201 XString ws; /* expandable output word */ 202 char *wp; /* output word pointer */ 203 char *sp, *dp; 204 205 Again: 206 states[0].type = SINVALID; 207 states[0].ls_base = NULL; 208 statep = &states[1]; 209 state_info.base = states; 210 state_info.end = &state_info.base[STATE_BSIZE]; 211 212 Xinit(ws, wp, 64, ATEMP); 213 214 backslash_skip = 0; 215 ignore_backslash_newline = 0; 216 217 if (cf & ONEWORD) 218 state = SWORD; 219 else if (cf & LETEXPR) { 220 /* enclose arguments in (double) quotes */ 221 *wp++ = OQUOTE; 222 state = SLETPAREN; 223 statep->nparen = 0; 224 } else { 225 /* normal lexing */ 226 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 227 while ((c = getsc()) == ' ' || c == '\t') 228 ; 229 if (c == '#') { 230 ignore_backslash_newline++; 231 while ((c = getsc()) != '\0' && c != '\n') 232 ; 233 ignore_backslash_newline--; 234 } 235 ungetsc(c); 236 } 237 if (source->flags & SF_ALIAS) { 238 /* trailing ' ' in alias definition */ 239 source->flags &= ~SF_ALIAS; 240 cf |= ALIAS; 241 } 242 243 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */ 244 statep->type = state; 245 246 /* check for here string */ 247 if (state == SHEREDELIM) { 248 c = getsc(); 249 if (c == '<') { 250 state = SHERESTRING; 251 while ((c = getsc()) == ' ' || c == '\t') 252 ; 253 ungetsc(c); 254 c = '<'; 255 goto accept_nonword; 256 } 257 ungetsc(c); 258 } 259 260 /* collect non-special or quoted characters to form word */ 261 while (!((c = getsc()) == 0 || 262 ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) && 263 ctype(c, C_LEX1)))) { 264 accept_nonword: 265 Xcheck(ws, wp); 266 switch (state) { 267 case SADELIM: 268 if (c == '(') 269 statep->nparen++; 270 else if (c == ')') 271 statep->nparen--; 272 else if (statep->nparen == 0 && 273 (c == /*{*/ '}' || c == statep->ls_adelim.delimiter)) { 274 *wp++ = ADELIM; 275 *wp++ = c; 276 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0) 277 POP_STATE(); 278 if (c == /*{*/ '}') 279 POP_STATE(); 280 break; 281 } 282 /* FALLTHROUGH */ 283 case SBASE: 284 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 285 /* temporary */ 286 *wp = EOS; 287 if (is_wdvarname(Xstring(ws, wp), false)) { 288 char *p, *tmp; 289 290 if (arraysub(&tmp)) { 291 *wp++ = CHAR; 292 *wp++ = c; 293 for (p = tmp; *p; ) { 294 Xcheck(ws, wp); 295 *wp++ = CHAR; 296 *wp++ = *p++; 297 } 298 afree(tmp, ATEMP); 299 break; 300 } else { 301 Source *s; 302 303 s = pushs(SREREAD, 304 source->areap); 305 s->start = s->str = 306 s->u.freeme = tmp; 307 s->next = source; 308 source = s; 309 } 310 } 311 *wp++ = CHAR; 312 *wp++ = c; 313 break; 314 } 315 /* FALLTHROUGH */ 316 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 317 if (c == '*' || c == '@' || c == '+' || c == '?' || 318 c == '!') { 319 c2 = getsc(); 320 if (c2 == '(' /*)*/ ) { 321 *wp++ = OPAT; 322 *wp++ = c; 323 PUSH_STATE(SPATTERN); 324 break; 325 } 326 ungetsc(c2); 327 } 328 /* FALLTHROUGH */ 329 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 330 switch (c) { 331 case '\\': 332 getsc_qchar: 333 if ((c = getsc())) { 334 /* trailing \ is lost */ 335 *wp++ = QCHAR; 336 *wp++ = c; 337 } 338 break; 339 case '\'': 340 open_ssquote: 341 *wp++ = OQUOTE; 342 ignore_backslash_newline++; 343 PUSH_STATE(SSQUOTE); 344 break; 345 case '"': 346 open_sdquote: 347 *wp++ = OQUOTE; 348 PUSH_STATE(SDQUOTE); 349 break; 350 default: 351 goto Subst; 352 } 353 break; 354 355 Subst: 356 switch (c) { 357 case '\\': 358 c = getsc(); 359 switch (c) { 360 case '"': 361 if ((cf & HEREDOC)) 362 goto heredocquote; 363 /* FALLTHROUGH */ 364 case '\\': 365 case '$': case '`': 366 store_qchar: 367 *wp++ = QCHAR; 368 *wp++ = c; 369 break; 370 default: 371 heredocquote: 372 Xcheck(ws, wp); 373 if (c) { 374 /* trailing \ is lost */ 375 *wp++ = CHAR; 376 *wp++ = '\\'; 377 *wp++ = CHAR; 378 *wp++ = c; 379 } 380 break; 381 } 382 break; 383 case '$': 384 subst_dollar: 385 c = getsc(); 386 if (c == '(') /*)*/ { 387 c = getsc(); 388 if (c == '(') /*)*/ { 389 *wp++ = EXPRSUB; 390 PUSH_STATE(SASPAREN); 391 statep->nparen = 2; 392 PUSH_SRETRACE(); 393 *retrace_info->xp++ = '('; 394 } else { 395 ungetsc(c); 396 subst_command: 397 sp = yyrecursive(); 398 cz = strlen(sp) + 1; 399 XcheckN(ws, wp, cz); 400 *wp++ = COMSUB; 401 memcpy(wp, sp, cz); 402 wp += cz; 403 } 404 } else if (c == '{') /*}*/ { 405 *wp++ = OSUBST; 406 *wp++ = '{'; /*}*/ 407 wp = get_brace_var(&ws, wp); 408 c = getsc(); 409 /* allow :# and :% (ksh88 compat) */ 410 if (c == ':') { 411 *wp++ = CHAR; 412 *wp++ = c; 413 c = getsc(); 414 if (c == ':') { 415 *wp++ = CHAR; 416 *wp++ = '0'; 417 *wp++ = ADELIM; 418 *wp++ = ':'; 419 PUSH_STATE(SBRACE); 420 PUSH_STATE(SADELIM); 421 statep->ls_adelim.delimiter = ':'; 422 statep->ls_adelim.num = 1; 423 statep->nparen = 0; 424 break; 425 } else if (ksh_isdigit(c) || 426 c == '('/*)*/ || c == ' ' || 427 /*XXX what else? */ 428 c == '$') { 429 /* substring subst. */ 430 if (c != ' ') { 431 *wp++ = CHAR; 432 *wp++ = ' '; 433 } 434 ungetsc(c); 435 PUSH_STATE(SBRACE); 436 PUSH_STATE(SADELIM); 437 statep->ls_adelim.delimiter = ':'; 438 statep->ls_adelim.num = 2; 439 statep->nparen = 0; 440 break; 441 } 442 } else if (c == '/') { 443 *wp++ = CHAR; 444 *wp++ = c; 445 if ((c = getsc()) == '/') { 446 *wp++ = ADELIM; 447 *wp++ = c; 448 } else 449 ungetsc(c); 450 PUSH_STATE(SBRACE); 451 PUSH_STATE(SADELIM); 452 statep->ls_adelim.delimiter = '/'; 453 statep->ls_adelim.num = 1; 454 statep->nparen = 0; 455 break; 456 } 457 /* 458 * If this is a trim operation, 459 * treat (,|,) specially in STBRACE. 460 */ 461 if (ctype(c, C_SUBOP2)) { 462 ungetsc(c); 463 if (Flag(FSH)) 464 PUSH_STATE(STBRACEBOURNE); 465 else 466 PUSH_STATE(STBRACEKORN); 467 } else { 468 ungetsc(c); 469 if (state == SDQUOTE) 470 PUSH_STATE(SQBRACE); 471 else 472 PUSH_STATE(SBRACE); 473 } 474 } else if (ksh_isalphx(c)) { 475 *wp++ = OSUBST; 476 *wp++ = 'X'; 477 do { 478 Xcheck(ws, wp); 479 *wp++ = c; 480 c = getsc(); 481 } while (ksh_isalnux(c)); 482 *wp++ = '\0'; 483 *wp++ = CSUBST; 484 *wp++ = 'X'; 485 ungetsc(c); 486 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 487 Xcheck(ws, wp); 488 *wp++ = OSUBST; 489 *wp++ = 'X'; 490 *wp++ = c; 491 *wp++ = '\0'; 492 *wp++ = CSUBST; 493 *wp++ = 'X'; 494 } else if (c == '\'' && (state == SBASE)) { 495 /* XXX which other states are valid? */ 496 *wp++ = OQUOTE; 497 ignore_backslash_newline++; 498 PUSH_STATE(SEQUOTE); 499 statep->ls_bool = false; 500 break; 501 } else if (c == '"' && (state == SBASE)) { 502 /* XXX which other states are valid? */ 503 goto DEQUOTE; 504 } else { 505 *wp++ = CHAR; 506 *wp++ = '$'; 507 DEQUOTE: 508 ungetsc(c); 509 } 510 break; 511 case '`': 512 subst_gravis: 513 PUSH_STATE(SBQUOTE); 514 *wp++ = COMSUB; 515 /* 516 * Need to know if we are inside double quotes 517 * since sh/AT&T-ksh translate the \" to " in 518 * "`...\"...`". 519 * This is not done in POSIX mode (section 520 * 3.2.3, Double Quotes: "The backquote shall 521 * retain its special meaning introducing the 522 * other form of command substitution (see 523 * 3.6.3). The portion of the quoted string 524 * from the initial backquote and the 525 * characters up to the next backquote that 526 * is not preceded by a backslash (having 527 * escape characters removed) defines that 528 * command whose output replaces `...` when 529 * the word is expanded." 530 * Section 3.6.3, Command Substitution: 531 * "Within the backquoted style of command 532 * substitution, backslash shall retain its 533 * literal meaning, except when followed by 534 * $ ` \."). 535 */ 536 statep->ls_bool = false; 537 s2 = statep; 538 base = state_info.base; 539 while (/* CONSTCOND */ 1) { 540 for (; s2 != base; s2--) { 541 if (s2->type == SDQUOTE) { 542 statep->ls_bool = true; 543 break; 544 } 545 } 546 if (s2 != base) 547 break; 548 if (!(s2 = s2->ls_base)) 549 break; 550 base = s2-- - STATE_BSIZE; 551 } 552 break; 553 case QCHAR: 554 if (cf & LQCHAR) { 555 *wp++ = QCHAR; 556 *wp++ = getsc(); 557 break; 558 } 559 /* FALLTHROUGH */ 560 default: 561 store_char: 562 *wp++ = CHAR; 563 *wp++ = c; 564 } 565 break; 566 567 case SEQUOTE: 568 if (c == '\'') { 569 POP_STATE(); 570 *wp++ = CQUOTE; 571 ignore_backslash_newline--; 572 } else if (c == '\\') { 573 if ((c2 = unbksl(true, s_get, s_put)) == -1) 574 c2 = s_get(); 575 if (c2 == 0) 576 statep->ls_bool = true; 577 if (!statep->ls_bool) { 578 char ts[4]; 579 580 if ((unsigned int)c2 < 0x100) { 581 *wp++ = QCHAR; 582 *wp++ = c2; 583 } else { 584 cz = utf_wctomb(ts, c2 - 0x100); 585 ts[cz] = 0; 586 for (cz = 0; ts[cz]; ++cz) { 587 *wp++ = QCHAR; 588 *wp++ = ts[cz]; 589 } 590 } 591 } 592 } else if (!statep->ls_bool) { 593 *wp++ = QCHAR; 594 *wp++ = c; 595 } 596 break; 597 598 case SSQUOTE: 599 if (c == '\'') { 600 POP_STATE(); 601 *wp++ = CQUOTE; 602 ignore_backslash_newline--; 603 } else { 604 *wp++ = QCHAR; 605 *wp++ = c; 606 } 607 break; 608 609 case SDQUOTE: 610 if (c == '"') { 611 POP_STATE(); 612 *wp++ = CQUOTE; 613 } else 614 goto Subst; 615 break; 616 617 /* $(( ... )) */ 618 case SASPAREN: 619 if (c == '(') 620 statep->nparen++; 621 else if (c == ')') { 622 statep->nparen--; 623 if (statep->nparen == 1) { 624 /* end of EXPRSUB */ 625 POP_SRETRACE(); 626 POP_STATE(); 627 628 if ((c2 = getsc()) == /*(*/ ')') { 629 cz = strlen(sp) - 2; 630 XcheckN(ws, wp, cz); 631 memcpy(wp, sp + 1, cz); 632 wp += cz; 633 afree(sp, ATEMP); 634 *wp++ = '\0'; 635 break; 636 } else { 637 Source *s; 638 639 ungetsc(c2); 640 /* 641 * mismatched parenthesis - 642 * assume we were really 643 * parsing a $(...) expression 644 */ 645 --wp; 646 s = pushs(SREREAD, 647 source->areap); 648 s->start = s->str = 649 s->u.freeme = sp; 650 s->next = source; 651 source = s; 652 goto subst_command; 653 } 654 } 655 } 656 /* reuse existing state machine */ 657 goto Sbase2; 658 659 case SQBRACE: 660 if (c == '\\') { 661 /* 662 * perform POSIX "quote removal" if the back- 663 * slash is "special", i.e. same cases as the 664 * {case '\\':} in Subst: plus closing brace; 665 * in mksh code "quote removal" on '\c' means 666 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 667 * emitted (in heredocquote:) 668 */ 669 if ((c = getsc()) == '"' || c == '\\' || 670 c == '$' || c == '`' || c == /*{*/'}') 671 goto store_qchar; 672 goto heredocquote; 673 } 674 goto common_SQBRACE; 675 676 case SBRACE: 677 if (c == '\'') 678 goto open_ssquote; 679 else if (c == '\\') 680 goto getsc_qchar; 681 common_SQBRACE: 682 if (c == '"') 683 goto open_sdquote; 684 else if (c == '$') 685 goto subst_dollar; 686 else if (c == '`') 687 goto subst_gravis; 688 else if (c != /*{*/ '}') 689 goto store_char; 690 POP_STATE(); 691 *wp++ = CSUBST; 692 *wp++ = /*{*/ '}'; 693 break; 694 695 /* Same as SBASE, except (,|,) treated specially */ 696 case STBRACEKORN: 697 if (c == '|') 698 *wp++ = SPAT; 699 else if (c == '(') { 700 *wp++ = OPAT; 701 /* simile for @ */ 702 *wp++ = ' '; 703 PUSH_STATE(SPATTERN); 704 } else /* FALLTHROUGH */ 705 case STBRACEBOURNE: 706 if (c == /*{*/ '}') { 707 POP_STATE(); 708 *wp++ = CSUBST; 709 *wp++ = /*{*/ '}'; 710 } else 711 goto Sbase1; 712 break; 713 714 case SBQUOTE: 715 if (c == '`') { 716 *wp++ = 0; 717 POP_STATE(); 718 } else if (c == '\\') { 719 switch (c = getsc()) { 720 case 0: 721 /* trailing \ is lost */ 722 break; 723 case '\\': 724 case '$': case '`': 725 *wp++ = c; 726 break; 727 case '"': 728 if (statep->ls_bool) { 729 *wp++ = c; 730 break; 731 } 732 /* FALLTHROUGH */ 733 default: 734 *wp++ = '\\'; 735 *wp++ = c; 736 break; 737 } 738 } else 739 *wp++ = c; 740 break; 741 742 /* ONEWORD */ 743 case SWORD: 744 goto Subst; 745 746 /* LETEXPR: (( ... )) */ 747 case SLETPAREN: 748 if (c == /*(*/ ')') { 749 if (statep->nparen > 0) 750 --statep->nparen; 751 else if ((c2 = getsc()) == /*(*/ ')') { 752 c = 0; 753 *wp++ = CQUOTE; 754 goto Done; 755 } else { 756 Source *s; 757 758 ungetsc(c2); 759 /* 760 * mismatched parenthesis - 761 * assume we were really 762 * parsing a (...) expression 763 */ 764 *wp = EOS; 765 sp = Xstring(ws, wp); 766 dp = wdstrip(sp, WDS_KEEPQ); 767 s = pushs(SREREAD, source->areap); 768 s->start = s->str = s->u.freeme = dp; 769 s->next = source; 770 source = s; 771 return ('('/*)*/); 772 } 773 } else if (c == '(') 774 /* 775 * parentheses inside quotes and 776 * backslashes are lost, but AT&T ksh 777 * doesn't count them either 778 */ 779 ++statep->nparen; 780 goto Sbase2; 781 782 /* <<< delimiter */ 783 case SHERESTRING: 784 if (c == '\\') { 785 c = getsc(); 786 if (c) { 787 /* trailing \ is lost */ 788 *wp++ = QCHAR; 789 *wp++ = c; 790 } 791 } else if (c == '$') { 792 if ((c2 = getsc()) == '\'') { 793 PUSH_STATE(SEQUOTE); 794 statep->ls_bool = false; 795 goto sherestring_quoted; 796 } else if (c2 == '"') 797 goto sherestring_dquoted; 798 ungetsc(c2); 799 goto sherestring_regular; 800 } else if (c == '\'') { 801 PUSH_STATE(SSQUOTE); 802 sherestring_quoted: 803 *wp++ = OQUOTE; 804 ignore_backslash_newline++; 805 } else if (c == '"') { 806 sherestring_dquoted: 807 state = statep->type = SHEREDQUOTE; 808 *wp++ = OQUOTE; 809 /* just don't IFS split; no quoting mode */ 810 } else { 811 sherestring_regular: 812 *wp++ = CHAR; 813 *wp++ = c; 814 } 815 break; 816 817 /* <<,<<- delimiter */ 818 case SHEREDELIM: 819 /* 820 * XXX chuck this state (and the next) - use 821 * the existing states ($ and \`...` should be 822 * stripped of their specialness after the 823 * fact). 824 */ 825 /* 826 * here delimiters need a special case since 827 * $ and `...` are not to be treated specially 828 */ 829 if (c == '\\') { 830 c = getsc(); 831 if (c) { 832 /* trailing \ is lost */ 833 *wp++ = QCHAR; 834 *wp++ = c; 835 } 836 } else if (c == '$') { 837 if ((c2 = getsc()) == '\'') { 838 PUSH_STATE(SEQUOTE); 839 statep->ls_bool = false; 840 goto sheredelim_quoted; 841 } else if (c2 == '"') 842 goto sheredelim_dquoted; 843 ungetsc(c2); 844 goto sheredelim_regular; 845 } else if (c == '\'') { 846 PUSH_STATE(SSQUOTE); 847 sheredelim_quoted: 848 *wp++ = OQUOTE; 849 ignore_backslash_newline++; 850 } else if (c == '"') { 851 sheredelim_dquoted: 852 state = statep->type = SHEREDQUOTE; 853 *wp++ = OQUOTE; 854 } else { 855 sheredelim_regular: 856 *wp++ = CHAR; 857 *wp++ = c; 858 } 859 break; 860 861 /* " in <<,<<- delimiter */ 862 case SHEREDQUOTE: 863 if (c == '"') { 864 *wp++ = CQUOTE; 865 state = statep->type = 866 /* dp[1] == '<' means here string */ 867 Xstring(ws, wp)[1] == '<' ? 868 SHERESTRING : SHEREDELIM; 869 } else { 870 if (c == '\\') { 871 switch (c = getsc()) { 872 case 0: 873 /* trailing \ is lost */ 874 case '\\': 875 case '"': 876 case '$': 877 case '`': 878 break; 879 default: 880 *wp++ = CHAR; 881 *wp++ = '\\'; 882 break; 883 } 884 } 885 *wp++ = CHAR; 886 *wp++ = c; 887 } 888 break; 889 890 /* in *(...|...) pattern (*+?@!) */ 891 case SPATTERN: 892 if (c == /*(*/ ')') { 893 *wp++ = CPAT; 894 POP_STATE(); 895 } else if (c == '|') { 896 *wp++ = SPAT; 897 } else if (c == '(') { 898 *wp++ = OPAT; 899 /* simile for @ */ 900 *wp++ = ' '; 901 PUSH_STATE(SPATTERN); 902 } else 903 goto Sbase1; 904 break; 905 } 906 } 907 Done: 908 Xcheck(ws, wp); 909 if (statep != &states[1]) 910 /* XXX figure out what is missing */ 911 yyerror("no closing quote\n"); 912 913 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 914 if (state == SHEREDELIM || state == SHERESTRING) 915 state = SBASE; 916 917 dp = Xstring(ws, wp); 918 if ((c == '<' || c == '>' || c == '&') && state == SBASE) { 919 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 920 921 if (Xlength(ws, wp) == 0) 922 iop->unit = c == '<' ? 0 : 1; 923 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { 924 if (dp[c2] != CHAR) 925 goto no_iop; 926 if (!ksh_isdigit(dp[c2 + 1])) 927 goto no_iop; 928 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; 929 } 930 931 if (iop->unit >= FDBASE) 932 goto no_iop; 933 934 if (c == '&') { 935 if ((c2 = getsc()) != '>') { 936 ungetsc(c2); 937 goto no_iop; 938 } 939 c = c2; 940 iop->flag = IOBASH; 941 } else 942 iop->flag = 0; 943 944 c2 = getsc(); 945 /* <<, >>, <> are ok, >< is not */ 946 if (c == c2 || (c == '<' && c2 == '>')) { 947 iop->flag |= c == c2 ? 948 (c == '>' ? IOCAT : IOHERE) : IORDWR; 949 if (iop->flag == IOHERE) { 950 if ((c2 = getsc()) == '-') { 951 iop->flag |= IOSKIP; 952 c2 = getsc(); 953 } else if (c2 == '<') 954 iop->flag |= IOHERESTR; 955 ungetsc(c2); 956 if (c2 == '\n') 957 iop->flag |= IONDELIM; 958 } 959 } else if (c2 == '&') 960 iop->flag |= IODUP | (c == '<' ? IORDUP : 0); 961 else { 962 iop->flag |= c == '>' ? IOWRITE : IOREAD; 963 if (c == '>' && c2 == '|') 964 iop->flag |= IOCLOB; 965 else 966 ungetsc(c2); 967 } 968 969 iop->name = NULL; 970 iop->delim = NULL; 971 iop->heredoc = NULL; 972 /* free word */ 973 Xfree(ws, wp); 974 yylval.iop = iop; 975 return (REDIR); 976 no_iop: 977 afree(iop, ATEMP); 978 } 979 980 if (wp == dp && state == SBASE) { 981 /* free word */ 982 Xfree(ws, wp); 983 /* no word, process LEX1 character */ 984 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { 985 if ((c2 = getsc()) == c) 986 c = (c == ';') ? BREAK : 987 (c == '|') ? LOGOR : 988 (c == '&') ? LOGAND : 989 /* c == '(' ) */ MDPAREN; 990 else if (c == '|' && c2 == '&') 991 c = COPROC; 992 else if (c == ';' && c2 == '|') 993 c = BRKEV; 994 else if (c == ';' && c2 == '&') 995 c = BRKFT; 996 else 997 ungetsc(c2); 998 #ifndef MKSH_SMALL 999 if (c == BREAK) { 1000 if ((c2 = getsc()) == '&') 1001 c = BRKEV; 1002 else 1003 ungetsc(c2); 1004 } 1005 #endif 1006 } else if (c == '\n') { 1007 gethere(false); 1008 if (cf & CONTIN) 1009 goto Again; 1010 } else if (c == '\0') 1011 /* need here strings at EOF */ 1012 gethere(true); 1013 return (c); 1014 } 1015 1016 /* terminate word */ 1017 *wp++ = EOS; 1018 yylval.cp = Xclose(ws, wp); 1019 if (state == SWORD || state == SLETPAREN 1020 /* XXX ONEWORD? */) 1021 return (LWORD); 1022 1023 /* unget terminator */ 1024 ungetsc(c); 1025 1026 /* 1027 * note: the alias-vs-function code below depends on several 1028 * interna: starting from here, source->str is not modified; 1029 * the way getsc() and ungetsc() operate; etc. 1030 */ 1031 1032 /* copy word to unprefixed string ident */ 1033 sp = yylval.cp; 1034 dp = ident; 1035 if ((cf & HEREDELIM) && (sp[1] == '<')) 1036 while (dp < ident+IDENT) { 1037 if ((c = *sp++) == CHAR) 1038 *dp++ = *sp++; 1039 else if ((c != OQUOTE) && (c != CQUOTE)) 1040 break; 1041 } 1042 else 1043 while (dp < ident+IDENT && (c = *sp++) == CHAR) 1044 *dp++ = *sp++; 1045 /* Make sure the ident array stays '\0' padded */ 1046 memset(dp, 0, (ident+IDENT) - dp + 1); 1047 if (c != EOS) 1048 /* word is not unquoted */ 1049 *ident = '\0'; 1050 1051 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) { 1052 struct tbl *p; 1053 uint32_t h = hash(ident); 1054 1055 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1056 (!(cf & ESACONLY) || p->val.i == ESAC || 1057 p->val.i == /*{*/ '}')) { 1058 afree(yylval.cp, ATEMP); 1059 return (p->val.i); 1060 } 1061 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1062 (p->flag & ISSET)) { 1063 /* 1064 * this still points to the same character as the 1065 * ungetsc'd terminator from above 1066 */ 1067 const char *cp = source->str; 1068 1069 /* prefer POSIX but not Korn functions over aliases */ 1070 while (*cp == ' ' || *cp == '\t') 1071 /* 1072 * this is like getsc() without skipping 1073 * over Source boundaries (including not 1074 * parsing ungetsc'd characters that got 1075 * pushed into an SREREAD) which is what 1076 * we want here anyway: find out whether 1077 * the alias name is followed by a POSIX 1078 * function definition (only the opening 1079 * parenthesis is checked though) 1080 */ 1081 ++cp; 1082 /* prefer functions over aliases */ 1083 if (cp[0] != '(' || cp[1] != ')') { 1084 Source *s = source; 1085 1086 while (s && (s->flags & SF_HASALIAS)) 1087 if (s->u.tblp == p) 1088 return (LWORD); 1089 else 1090 s = s->next; 1091 /* push alias expansion */ 1092 s = pushs(SALIAS, source->areap); 1093 s->start = s->str = p->val.s; 1094 s->u.tblp = p; 1095 s->flags |= SF_HASALIAS; 1096 s->next = source; 1097 if (source->type == SEOF) { 1098 /* prevent infinite recursion at EOS */ 1099 source->u.tblp = p; 1100 source->flags |= SF_HASALIAS; 1101 } 1102 source = s; 1103 afree(yylval.cp, ATEMP); 1104 goto Again; 1105 } 1106 } 1107 } 1108 1109 return (LWORD); 1110 } 1111 1112 static void 1113 gethere(bool iseof) 1114 { 1115 struct ioword **p; 1116 1117 for (p = heres; p < herep; p++) 1118 if (iseof && !((*p)->flag & IOHERESTR)) 1119 /* only here strings at EOF */ 1120 return; 1121 else 1122 readhere(*p); 1123 herep = heres; 1124 } 1125 1126 /* 1127 * read "<<word" text into temp file 1128 */ 1129 1130 static void 1131 readhere(struct ioword *iop) 1132 { 1133 int c; 1134 const char *eof, *eofp; 1135 XString xs; 1136 char *xp; 1137 int xpos; 1138 1139 if (iop->flag & IOHERESTR) { 1140 /* process the here string */ 1141 iop->heredoc = xp = evalstr(iop->delim, DOBLANK); 1142 xpos = strlen(xp) - 1; 1143 memmove(xp, xp + 1, xpos); 1144 xp[xpos] = '\n'; 1145 return; 1146 } 1147 1148 eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0); 1149 1150 if (!(iop->flag & IOEVAL)) 1151 ignore_backslash_newline++; 1152 1153 Xinit(xs, xp, 256, ATEMP); 1154 1155 heredoc_read_line: 1156 /* beginning of line */ 1157 eofp = eof; 1158 xpos = Xsavepos(xs, xp); 1159 if (iop->flag & IOSKIP) { 1160 /* skip over leading tabs */ 1161 while ((c = getsc()) == '\t') 1162 /* nothing */; 1163 goto heredoc_parse_char; 1164 } 1165 heredoc_read_char: 1166 c = getsc(); 1167 heredoc_parse_char: 1168 /* compare with here document marker */ 1169 if (!*eofp) { 1170 /* end of here document marker, what to do? */ 1171 switch (c) { 1172 case /*(*/ ')': 1173 if (!subshell_nesting_level) 1174 /*- 1175 * not allowed outside $(...) or (...) 1176 * => mismatch 1177 */ 1178 break; 1179 /* allow $(...) or (...) to close here */ 1180 ungetsc(/*(*/ ')'); 1181 /* FALLTHROUGH */ 1182 case 0: 1183 /* 1184 * Allow EOF here to commands without trailing 1185 * newlines (mksh -c '...') will work as well. 1186 */ 1187 case '\n': 1188 /* Newline terminates here document marker */ 1189 goto heredoc_found_terminator; 1190 } 1191 } else if (c == *eofp++) 1192 /* store; then read and compare next character */ 1193 goto heredoc_store_and_loop; 1194 /* nope, mismatch; read until end of line */ 1195 while (c != '\n') { 1196 if (!c) 1197 /* oops, reached EOF */ 1198 yyerror("%s '%s' unclosed\n", "here document", eof); 1199 /* store character */ 1200 Xcheck(xs, xp); 1201 Xput(xs, xp, c); 1202 /* read next character */ 1203 c = getsc(); 1204 } 1205 /* we read a newline as last character */ 1206 heredoc_store_and_loop: 1207 /* store character */ 1208 Xcheck(xs, xp); 1209 Xput(xs, xp, c); 1210 if (c == '\n') 1211 goto heredoc_read_line; 1212 goto heredoc_read_char; 1213 1214 heredoc_found_terminator: 1215 /* jump back to saved beginning of line */ 1216 xp = Xrestpos(xs, xp, xpos); 1217 /* terminate, close and store */ 1218 Xput(xs, xp, '\0'); 1219 iop->heredoc = Xclose(xs, xp); 1220 1221 if (!(iop->flag & IOEVAL)) 1222 ignore_backslash_newline--; 1223 } 1224 1225 void 1226 yyerror(const char *fmt, ...) 1227 { 1228 va_list va; 1229 1230 /* pop aliases and re-reads */ 1231 while (source->type == SALIAS || source->type == SREREAD) 1232 source = source->next; 1233 /* zap pending input */ 1234 source->str = null; 1235 1236 error_prefix(true); 1237 va_start(va, fmt); 1238 shf_vfprintf(shl_out, fmt, va); 1239 va_end(va); 1240 errorfz(); 1241 } 1242 1243 /* 1244 * input for yylex with alias expansion 1245 */ 1246 1247 Source * 1248 pushs(int type, Area *areap) 1249 { 1250 Source *s; 1251 1252 s = alloc(sizeof(Source), areap); 1253 memset(s, 0, sizeof(Source)); 1254 s->type = type; 1255 s->str = null; 1256 s->areap = areap; 1257 if (type == SFILE || type == SSTDIN) 1258 XinitN(s->xs, 256, s->areap); 1259 return (s); 1260 } 1261 1262 static int 1263 getsc_uu(void) 1264 { 1265 Source *s = source; 1266 int c; 1267 1268 while ((c = *s->str++) == 0) { 1269 /* return 0 for EOF by default */ 1270 s->str = NULL; 1271 switch (s->type) { 1272 case SEOF: 1273 s->str = null; 1274 return (0); 1275 1276 case SSTDIN: 1277 case SFILE: 1278 getsc_line(s); 1279 break; 1280 1281 case SWSTR: 1282 break; 1283 1284 case SSTRING: 1285 break; 1286 1287 case SWORDS: 1288 s->start = s->str = *s->u.strv++; 1289 s->type = SWORDSEP; 1290 break; 1291 1292 case SWORDSEP: 1293 if (*s->u.strv == NULL) { 1294 s->start = s->str = "\n"; 1295 s->type = SEOF; 1296 } else { 1297 s->start = s->str = " "; 1298 s->type = SWORDS; 1299 } 1300 break; 1301 1302 case SALIAS: 1303 if (s->flags & SF_ALIASEND) { 1304 /* pass on an unused SF_ALIAS flag */ 1305 source = s->next; 1306 source->flags |= s->flags & SF_ALIAS; 1307 s = source; 1308 } else if (*s->u.tblp->val.s && 1309 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { 1310 /* pop source stack */ 1311 source = s = s->next; 1312 /* 1313 * Note that this alias ended with a 1314 * space, enabling alias expansion on 1315 * the following word. 1316 */ 1317 s->flags |= SF_ALIAS; 1318 } else { 1319 /* 1320 * At this point, we need to keep the current 1321 * alias in the source list so recursive 1322 * aliases can be detected and we also need to 1323 * return the next character. Do this by 1324 * temporarily popping the alias to get the 1325 * next character and then put it back in the 1326 * source list with the SF_ALIASEND flag set. 1327 */ 1328 /* pop source stack */ 1329 source = s->next; 1330 source->flags |= s->flags & SF_ALIAS; 1331 c = getsc_uu(); 1332 if (c) { 1333 s->flags |= SF_ALIASEND; 1334 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1335 s->start = s->str = s->ugbuf; 1336 s->next = source; 1337 source = s; 1338 } else { 1339 s = source; 1340 /* avoid reading EOF twice */ 1341 s->str = NULL; 1342 break; 1343 } 1344 } 1345 continue; 1346 1347 case SREREAD: 1348 if (s->start != s->ugbuf) 1349 /* yuck */ 1350 afree(s->u.freeme, ATEMP); 1351 source = s = s->next; 1352 continue; 1353 } 1354 if (s->str == NULL) { 1355 s->type = SEOF; 1356 s->start = s->str = null; 1357 return ('\0'); 1358 } 1359 if (s->flags & SF_ECHO) { 1360 shf_puts(s->str, shl_out); 1361 shf_flush(shl_out); 1362 } 1363 } 1364 return (c); 1365 } 1366 1367 static void 1368 getsc_line(Source *s) 1369 { 1370 char *xp = Xstring(s->xs, xp), *cp; 1371 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1372 int have_tty = interactive && (s->flags & SF_TTY); 1373 1374 /* Done here to ensure nothing odd happens when a timeout occurs */ 1375 XcheckN(s->xs, xp, LINE); 1376 *xp = '\0'; 1377 s->start = s->str = xp; 1378 1379 if (have_tty && ksh_tmout) { 1380 ksh_tmout_state = TMOUT_READING; 1381 alarm(ksh_tmout); 1382 } 1383 if (interactive) 1384 change_winsz(); 1385 if (have_tty && ( 1386 #if !MKSH_S_NOVI 1387 Flag(FVI) || 1388 #endif 1389 Flag(FEMACS) || Flag(FGMACS))) { 1390 int nread; 1391 1392 nread = x_read(xp, LINE); 1393 if (nread < 0) 1394 /* read error */ 1395 nread = 0; 1396 xp[nread] = '\0'; 1397 xp += nread; 1398 } else { 1399 if (interactive) 1400 pprompt(prompt, 0); 1401 else 1402 s->line++; 1403 1404 while (/* CONSTCOND */ 1) { 1405 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1406 1407 if (!p && shf_error(s->u.shf) && 1408 shf_errno(s->u.shf) == EINTR) { 1409 shf_clearerr(s->u.shf); 1410 if (trap) 1411 runtraps(0); 1412 continue; 1413 } 1414 if (!p || (xp = p, xp[-1] == '\n')) 1415 break; 1416 /* double buffer size */ 1417 /* move past NUL so doubling works... */ 1418 xp++; 1419 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1420 /* ...and move back again */ 1421 xp--; 1422 } 1423 /* 1424 * flush any unwanted input so other programs/builtins 1425 * can read it. Not very optimal, but less error prone 1426 * than flushing else where, dealing with redirections, 1427 * etc. 1428 * TODO: reduce size of shf buffer (~128?) if SSTDIN 1429 */ 1430 if (s->type == SSTDIN) 1431 shf_flush(s->u.shf); 1432 } 1433 /* 1434 * XXX: temporary kludge to restore source after a 1435 * trap may have been executed. 1436 */ 1437 source = s; 1438 if (have_tty && ksh_tmout) { 1439 ksh_tmout_state = TMOUT_EXECUTING; 1440 alarm(0); 1441 } 1442 cp = Xstring(s->xs, xp); 1443 #ifndef MKSH_SMALL 1444 if (interactive && *cp == '!' && cur_prompt == PS1) { 1445 int linelen; 1446 1447 linelen = Xlength(s->xs, xp); 1448 XcheckN(s->xs, xp, Zfc_e_dash + /* NUL */ 1); 1449 /* reload after potential realloc */ 1450 cp = Xstring(s->xs, xp); 1451 /* change initial '!' into space */ 1452 *cp = ' '; 1453 /* NUL terminate the current string */ 1454 *xp = '\0'; 1455 /* move the actual string forward */ 1456 memmove(cp + Zfc_e_dash, cp, linelen + /* NUL */ 1); 1457 xp += Zfc_e_dash; 1458 /* prepend it with "fc -e -" */ 1459 memcpy(cp, Tfc_e_dash, Zfc_e_dash); 1460 } 1461 #endif 1462 s->start = s->str = cp; 1463 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1464 /* Note: if input is all nulls, this is not eof */ 1465 if (Xlength(s->xs, xp) == 0) { 1466 /* EOF */ 1467 if (s->type == SFILE) 1468 shf_fdclose(s->u.shf); 1469 s->str = NULL; 1470 } else if (interactive && *s->str && 1471 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) { 1472 histsave(&s->line, s->str, true, true); 1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1474 } else if (interactive && cur_prompt == PS1) { 1475 cp = Xstring(s->xs, xp); 1476 while (*cp && ctype(*cp, C_IFSWS)) 1477 ++cp; 1478 if (!*cp) 1479 histsync(); 1480 #endif 1481 } 1482 if (interactive) 1483 set_prompt(PS2, NULL); 1484 } 1485 1486 void 1487 set_prompt(int to, Source *s) 1488 { 1489 cur_prompt = to; 1490 1491 switch (to) { 1492 /* command */ 1493 case PS1: 1494 /* 1495 * Substitute ! and !! here, before substitutions are done 1496 * so ! in expanded variables are not expanded. 1497 * NOTE: this is not what AT&T ksh does (it does it after 1498 * substitutions, POSIX doesn't say which is to be done. 1499 */ 1500 { 1501 struct shf *shf; 1502 char * volatile ps1; 1503 Area *saved_atemp; 1504 1505 ps1 = str_val(global("PS1")); 1506 shf = shf_sopen(NULL, strlen(ps1) * 2, 1507 SHF_WR | SHF_DYNAMIC, NULL); 1508 while (*ps1) 1509 if (*ps1 != '!' || *++ps1 == '!') 1510 shf_putchar(*ps1++, shf); 1511 else 1512 shf_fprintf(shf, "%d", 1513 s ? s->line + 1 : 0); 1514 ps1 = shf_sclose(shf); 1515 saved_atemp = ATEMP; 1516 newenv(E_ERRH); 1517 if (sigsetjmp(e->jbuf, 0)) { 1518 prompt = safe_prompt; 1519 /* 1520 * Don't print an error - assume it has already 1521 * been printed. Reason is we may have forked 1522 * to run a command and the child may be 1523 * unwinding its stack through this code as it 1524 * exits. 1525 */ 1526 } else { 1527 char *cp = substitute(ps1, 0); 1528 strdupx(prompt, cp, saved_atemp); 1529 } 1530 quitenv(NULL); 1531 } 1532 break; 1533 /* command continuation */ 1534 case PS2: 1535 prompt = str_val(global("PS2")); 1536 break; 1537 } 1538 } 1539 1540 static int 1541 dopprompt(const char *cp, int ntruncate, bool doprint) 1542 { 1543 int columns = 0, lines = 0, indelimit = 0; 1544 char delimiter = 0; 1545 1546 /* 1547 * Undocumented AT&T ksh feature: 1548 * If the second char in the prompt string is \r then the first 1549 * char is taken to be a non-printing delimiter and any chars 1550 * between two instances of the delimiter are not considered to 1551 * be part of the prompt length 1552 */ 1553 if (*cp && cp[1] == '\r') { 1554 delimiter = *cp; 1555 cp += 2; 1556 } 1557 for (; *cp; cp++) { 1558 if (indelimit && *cp != delimiter) 1559 ; 1560 else if (*cp == '\n' || *cp == '\r') { 1561 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1562 columns = 0; 1563 } else if (*cp == '\t') { 1564 columns = (columns | 7) + 1; 1565 } else if (*cp == '\b') { 1566 if (columns > 0) 1567 columns--; 1568 } else if (*cp == delimiter) 1569 indelimit = !indelimit; 1570 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { 1571 const char *cp2; 1572 columns += utf_widthadj(cp, &cp2); 1573 if (doprint && (indelimit || 1574 (ntruncate < (x_cols * lines + columns)))) 1575 shf_write(cp, cp2 - cp, shl_out); 1576 cp = cp2 - /* loop increment */ 1; 1577 continue; 1578 } else 1579 columns++; 1580 if (doprint && (*cp != delimiter) && 1581 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1582 shf_putc(*cp, shl_out); 1583 } 1584 if (doprint) 1585 shf_flush(shl_out); 1586 return (x_cols * lines + columns); 1587 } 1588 1589 1590 void 1591 pprompt(const char *cp, int ntruncate) 1592 { 1593 dopprompt(cp, ntruncate, true); 1594 } 1595 1596 int 1597 promptlen(const char *cp) 1598 { 1599 return (dopprompt(cp, 0, false)); 1600 } 1601 1602 /* 1603 * Read the variable part of a ${...} expression (i.e. up to but not 1604 * including the :[-+?=#%] or close-brace). 1605 */ 1606 static char * 1607 get_brace_var(XString *wsp, char *wp) 1608 { 1609 char c; 1610 enum parse_state { 1611 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1612 PS_NUMBER, PS_VAR1 1613 } state = PS_INITIAL; 1614 1615 while (/* CONSTCOND */ 1) { 1616 c = getsc(); 1617 /* State machine to figure out where the variable part ends. */ 1618 switch (state) { 1619 case PS_INITIAL: 1620 if (c == '#' || c == '!' || c == '%') { 1621 state = PS_SAW_HASH; 1622 break; 1623 } 1624 /* FALLTHROUGH */ 1625 case PS_SAW_HASH: 1626 if (ksh_isalphx(c)) 1627 state = PS_IDENT; 1628 else if (ksh_isdigit(c)) 1629 state = PS_NUMBER; 1630 else if (c == '#') { 1631 if (state == PS_SAW_HASH) { 1632 char c2; 1633 1634 c2 = getsc(); 1635 ungetsc(c2); 1636 if (c2 != '}') { 1637 ungetsc(c); 1638 goto out; 1639 } 1640 } 1641 state = PS_VAR1; 1642 } else if (ctype(c, C_VAR1)) 1643 state = PS_VAR1; 1644 else 1645 goto out; 1646 break; 1647 case PS_IDENT: 1648 if (!ksh_isalnux(c)) { 1649 if (c == '[') { 1650 char *tmp, *p; 1651 1652 if (!arraysub(&tmp)) 1653 yyerror("missing ]\n"); 1654 *wp++ = c; 1655 for (p = tmp; *p; ) { 1656 Xcheck(*wsp, wp); 1657 *wp++ = *p++; 1658 } 1659 afree(tmp, ATEMP); 1660 /* the ] */ 1661 c = getsc(); 1662 } 1663 goto out; 1664 } 1665 break; 1666 case PS_NUMBER: 1667 if (!ksh_isdigit(c)) 1668 goto out; 1669 break; 1670 case PS_VAR1: 1671 goto out; 1672 } 1673 Xcheck(*wsp, wp); 1674 *wp++ = c; 1675 } 1676 out: 1677 /* end of variable part */ 1678 *wp++ = '\0'; 1679 ungetsc(c); 1680 return (wp); 1681 } 1682 1683 /* 1684 * Save an array subscript - returns true if matching bracket found, false 1685 * if eof or newline was found. 1686 * (Returned string double null terminated) 1687 */ 1688 static bool 1689 arraysub(char **strp) 1690 { 1691 XString ws; 1692 char *wp, c; 1693 /* we are just past the initial [ */ 1694 int depth = 1; 1695 1696 Xinit(ws, wp, 32, ATEMP); 1697 1698 do { 1699 c = getsc(); 1700 Xcheck(ws, wp); 1701 *wp++ = c; 1702 if (c == '[') 1703 depth++; 1704 else if (c == ']') 1705 depth--; 1706 } while (depth > 0 && c && c != '\n'); 1707 1708 *wp++ = '\0'; 1709 *strp = Xclose(ws, wp); 1710 1711 return (tobool(depth == 0)); 1712 } 1713 1714 /* Unget a char: handles case when we are already at the start of the buffer */ 1715 static void 1716 ungetsc(int c) 1717 { 1718 struct sretrace_info *rp = retrace_info; 1719 1720 if (backslash_skip) 1721 backslash_skip--; 1722 /* Don't unget EOF... */ 1723 if (source->str == null && c == '\0') 1724 return; 1725 while (rp) { 1726 if (Xlength(rp->xs, rp->xp)) 1727 rp->xp--; 1728 rp = rp->next; 1729 } 1730 ungetsc_(c); 1731 } 1732 static void 1733 ungetsc_(int c) 1734 { 1735 if (source->str > source->start) 1736 source->str--; 1737 else { 1738 Source *s; 1739 1740 s = pushs(SREREAD, source->areap); 1741 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1742 s->start = s->str = s->ugbuf; 1743 s->next = source; 1744 source = s; 1745 } 1746 } 1747 1748 1749 /* Called to get a char that isn't a \newline sequence. */ 1750 static int 1751 getsc_bn(void) 1752 { 1753 int c, c2; 1754 1755 if (ignore_backslash_newline) 1756 return (o_getsc_u()); 1757 1758 if (backslash_skip == 1) { 1759 backslash_skip = 2; 1760 return (o_getsc_u()); 1761 } 1762 1763 backslash_skip = 0; 1764 1765 while (/* CONSTCOND */ 1) { 1766 c = o_getsc_u(); 1767 if (c == '\\') { 1768 if ((c2 = o_getsc_u()) == '\n') 1769 /* ignore the \newline; get the next char... */ 1770 continue; 1771 ungetsc_(c2); 1772 backslash_skip = 1; 1773 } 1774 return (c); 1775 } 1776 } 1777 1778 void 1779 yyskiputf8bom(void) 1780 { 1781 int c; 1782 1783 if ((unsigned char)(c = o_getsc_u()) != 0xEF) { 1784 ungetsc_(c); 1785 return; 1786 } 1787 if ((unsigned char)(c = o_getsc_u()) != 0xBB) { 1788 ungetsc_(c); 1789 ungetsc_(0xEF); 1790 return; 1791 } 1792 if ((unsigned char)(c = o_getsc_u()) != 0xBF) { 1793 ungetsc_(c); 1794 ungetsc_(0xBB); 1795 ungetsc_(0xEF); 1796 return; 1797 } 1798 UTFMODE |= 8; 1799 } 1800 1801 static Lex_state * 1802 push_state_(State_info *si, Lex_state *old_end) 1803 { 1804 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); 1805 1806 news[0].ls_base = old_end; 1807 si->base = &news[0]; 1808 si->end = &news[STATE_BSIZE]; 1809 return (&news[1]); 1810 } 1811 1812 static Lex_state * 1813 pop_state_(State_info *si, Lex_state *old_end) 1814 { 1815 Lex_state *old_base = si->base; 1816 1817 si->base = old_end->ls_base - STATE_BSIZE; 1818 si->end = old_end->ls_base; 1819 1820 afree(old_base, ATEMP); 1821 1822 return (si->base + STATE_BSIZE - 1); 1823 } 1824 1825 static int 1826 s_get(void) 1827 { 1828 return (getsc()); 1829 } 1830 1831 static void 1832 s_put(int c) 1833 { 1834 ungetsc(c); 1835 } 1836