1 /* $OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 5 * Thorsten Glaser <tg (at) mirbsd.org> 6 * 7 * Provided that these terms and disclaimer and all copyright notices 8 * are retained or reproduced in an accompanying document, permission 9 * is granted to deal in this work without restriction, including un- 10 * limited rights to use, publicly perform, distribute, sell, modify, 11 * merge, give away, or sublicence. 12 * 13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to 14 * the utmost extent permitted by applicable law, neither express nor 15 * implied; without malicious intent or gross negligence. In no event 16 * may a licensor, author or contributor be held liable for indirect, 17 * direct, other damage, loss, or other issues arising in any way out 18 * of dealing in the work, even if advised of the possibility of such 19 * damage or existence of a defect, except proven that it results out 20 * of said person's immediate fault when using the work as intended. 21 */ 22 23 #include "sh.h" 24 25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.118 2010/07/25 11:35:41 tg Exp $"); 26 27 /* 28 * states while lexing word 29 */ 30 #define SBASE 0 /* outside any lexical constructs */ 31 #define SWORD 1 /* implicit quoting for substitute() */ 32 #define SLETPAREN 2 /* inside (( )), implicit quoting */ 33 #define SSQUOTE 3 /* inside '' */ 34 #define SDQUOTE 4 /* inside "" */ 35 #define SEQUOTE 5 /* inside $'' */ 36 #define SBRACE 6 /* inside ${} */ 37 #define SQBRACE 7 /* inside "${}" */ 38 #define SCSPAREN 8 /* inside $() */ 39 #define SBQUOTE 9 /* inside `` */ 40 #define SASPAREN 10 /* inside $(( )) */ 41 #define SHEREDELIM 11 /* parsing <<,<<- delimiter */ 42 #define SHEREDQUOTE 12 /* parsing " in <<,<<- delimiter */ 43 #define SPATTERN 13 /* parsing *(...|...) pattern (*+?@!) */ 44 #define STBRACE 14 /* parsing ${...[#%]...} */ 45 #define SLETARRAY 15 /* inside =( ), just copy */ 46 #define SADELIM 16 /* like SBASE, looking for delimiter */ 47 #define SHERESTRING 17 /* parsing <<< string */ 48 49 /* Structure to keep track of the lexing state and the various pieces of info 50 * needed for each particular state. */ 51 typedef struct lex_state Lex_state; 52 struct lex_state { 53 int ls_state; 54 union { 55 /* $(...) */ 56 struct scsparen_info { 57 int nparen; /* count open parenthesis */ 58 int csstate; /* XXX remove */ 59 #define ls_scsparen ls_info.u_scsparen 60 } u_scsparen; 61 62 /* $((...)) */ 63 struct sasparen_info { 64 int nparen; /* count open parenthesis */ 65 int start; /* marks start of $(( in output str */ 66 #define ls_sasparen ls_info.u_sasparen 67 } u_sasparen; 68 69 /* ((...)) */ 70 struct sletparen_info { 71 int nparen; /* count open parenthesis */ 72 #define ls_sletparen ls_info.u_sletparen 73 } u_sletparen; 74 75 /* `...` */ 76 struct sbquote_info { 77 int indquotes; /* true if in double quotes: "`...`" */ 78 #define ls_sbquote ls_info.u_sbquote 79 } u_sbquote; 80 81 #ifndef MKSH_SMALL 82 /* =(...) */ 83 struct sletarray_info { 84 int nparen; /* count open parentheses */ 85 #define ls_sletarray ls_info.u_sletarray 86 } u_sletarray; 87 #endif 88 89 /* ADELIM */ 90 struct sadelim_info { 91 unsigned char nparen; /* count open parentheses */ 92 #define SADELIM_BASH 0 93 #define SADELIM_MAKE 1 94 unsigned char style; 95 unsigned char delimiter; 96 unsigned char num; 97 unsigned char flags; /* ofs. into sadelim_flags[] */ 98 #define ls_sadelim ls_info.u_sadelim 99 } u_sadelim; 100 101 /* $'...' */ 102 struct sequote_info { 103 bool got_NUL; /* ignore rest of string */ 104 #define ls_sequote ls_info.u_sequote 105 } u_sequote; 106 107 Lex_state *base; /* used to point to next state block */ 108 } ls_info; 109 }; 110 111 typedef struct { 112 Lex_state *base; 113 Lex_state *end; 114 } State_info; 115 116 static void readhere(struct ioword *); 117 static int getsc__(void); 118 static void getsc_line(Source *); 119 static int getsc_bn(void); 120 static int s_get(void); 121 static void s_put(int); 122 static char *get_brace_var(XString *, char *); 123 static int arraysub(char **); 124 static const char *ungetsc(int); 125 static void gethere(bool); 126 static Lex_state *push_state_(State_info *, Lex_state *); 127 static Lex_state *pop_state_(State_info *, Lex_state *); 128 129 static int dopprompt(const char *, int, bool); 130 131 static int backslash_skip; 132 static int ignore_backslash_newline; 133 134 /* optimised getsc_bn() */ 135 #define _getsc() (*source->str != '\0' && *source->str != '\\' \ 136 && !backslash_skip && !(source->flags & SF_FIRST) \ 137 ? *source->str++ : getsc_bn()) 138 /* optimised getsc__() */ 139 #define _getsc_() ((*source->str != '\0') && !(source->flags & SF_FIRST) \ 140 ? *source->str++ : getsc__()) 141 142 #ifdef MKSH_SMALL 143 static int getsc(void); 144 static int getsc_(void); 145 146 static int 147 getsc(void) 148 { 149 return (_getsc()); 150 } 151 152 static int 153 getsc_(void) 154 { 155 return (_getsc_()); 156 } 157 #else 158 /* !MKSH_SMALL: use them inline */ 159 #define getsc() _getsc() 160 #define getsc_() _getsc_() 161 #endif 162 163 #define STATE_BSIZE 32 164 165 #define PUSH_STATE(s) do { \ 166 if (++statep == state_info.end) \ 167 statep = push_state_(&state_info, statep); \ 168 state = statep->ls_state = (s); \ 169 } while (0) 170 171 #define POP_STATE() do { \ 172 if (--statep == state_info.base) \ 173 statep = pop_state_(&state_info, statep); \ 174 state = statep->ls_state; \ 175 } while (0) 176 177 /** 178 * Lexical analyser 179 * 180 * tokens are not regular expressions, they are LL(1). 181 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 182 * hence the state stack. 183 */ 184 185 int 186 yylex(int cf) 187 { 188 Lex_state states[STATE_BSIZE], *statep, *s2, *base; 189 State_info state_info; 190 int c, c2, state; 191 XString ws; /* expandable output word */ 192 char *wp; /* output word pointer */ 193 char *sp, *dp; 194 195 Again: 196 states[0].ls_state = -1; 197 states[0].ls_info.base = NULL; 198 statep = &states[1]; 199 state_info.base = states; 200 state_info.end = &state_info.base[STATE_BSIZE]; 201 202 Xinit(ws, wp, 64, ATEMP); 203 204 backslash_skip = 0; 205 ignore_backslash_newline = 0; 206 207 if (cf&ONEWORD) 208 state = SWORD; 209 else if (cf&LETEXPR) { 210 /* enclose arguments in (double) quotes */ 211 *wp++ = OQUOTE; 212 state = SLETPAREN; 213 statep->ls_sletparen.nparen = 0; 214 #ifndef MKSH_SMALL 215 } else if (cf&LETARRAY) { 216 state = SLETARRAY; 217 statep->ls_sletarray.nparen = 0; 218 #endif 219 } else { /* normal lexing */ 220 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 221 while ((c = getsc()) == ' ' || c == '\t') 222 ; 223 if (c == '#') { 224 ignore_backslash_newline++; 225 while ((c = getsc()) != '\0' && c != '\n') 226 ; 227 ignore_backslash_newline--; 228 } 229 ungetsc(c); 230 } 231 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ 232 source->flags &= ~SF_ALIAS; 233 cf |= ALIAS; 234 } 235 236 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ 237 statep->ls_state = state; 238 239 /* check for here string */ 240 if (state == SHEREDELIM) { 241 c = getsc(); 242 if (c == '<') { 243 state = SHERESTRING; 244 while ((c = getsc()) == ' ' || c == '\t') 245 ; 246 ungetsc(c); 247 c = '<'; 248 goto accept_nonword; 249 } 250 ungetsc(c); 251 } 252 253 /* collect non-special or quoted characters to form word */ 254 while (!((c = getsc()) == 0 || 255 ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) && 256 ctype(c, C_LEX1)))) { 257 accept_nonword: 258 Xcheck(ws, wp); 259 switch (state) { 260 case SADELIM: 261 if (c == '(') 262 statep->ls_sadelim.nparen++; 263 else if (c == ')') 264 statep->ls_sadelim.nparen--; 265 else if (statep->ls_sadelim.nparen == 0 && 266 (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) { 267 *wp++ = ADELIM; 268 *wp++ = c; 269 if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0) 270 POP_STATE(); 271 if (c == /*{*/ '}') 272 POP_STATE(); 273 break; 274 } 275 /* FALLTHROUGH */ 276 case SBASE: 277 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 278 *wp = EOS; /* temporary */ 279 if (is_wdvarname(Xstring(ws, wp), false)) { 280 char *p, *tmp; 281 282 if (arraysub(&tmp)) { 283 *wp++ = CHAR; 284 *wp++ = c; 285 for (p = tmp; *p; ) { 286 Xcheck(ws, wp); 287 *wp++ = CHAR; 288 *wp++ = *p++; 289 } 290 afree(tmp, ATEMP); 291 break; 292 } else { 293 Source *s; 294 295 s = pushs(SREREAD, 296 source->areap); 297 s->start = s->str = 298 s->u.freeme = tmp; 299 s->next = source; 300 source = s; 301 } 302 } 303 *wp++ = CHAR; 304 *wp++ = c; 305 break; 306 } 307 /* FALLTHROUGH */ 308 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 309 if (c == '*' || c == '@' || c == '+' || c == '?' || 310 c == '!') { 311 c2 = getsc(); 312 if (c2 == '(' /*)*/ ) { 313 *wp++ = OPAT; 314 *wp++ = c; 315 PUSH_STATE(SPATTERN); 316 break; 317 } 318 ungetsc(c2); 319 } 320 /* FALLTHROUGH */ 321 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 322 switch (c) { 323 case '\\': 324 getsc_qchar: 325 if ((c = getsc())) { 326 /* trailing \ is lost */ 327 *wp++ = QCHAR; 328 *wp++ = c; 329 } 330 break; 331 case '\'': 332 open_ssquote: 333 *wp++ = OQUOTE; 334 ignore_backslash_newline++; 335 PUSH_STATE(SSQUOTE); 336 break; 337 case '"': 338 open_sdquote: 339 *wp++ = OQUOTE; 340 PUSH_STATE(SDQUOTE); 341 break; 342 default: 343 goto Subst; 344 } 345 break; 346 347 Subst: 348 switch (c) { 349 case '\\': 350 c = getsc(); 351 switch (c) { 352 case '"': 353 if ((cf & HEREDOC)) 354 goto heredocquote; 355 /* FALLTHROUGH */ 356 case '\\': 357 case '$': case '`': 358 store_qchar: 359 *wp++ = QCHAR; 360 *wp++ = c; 361 break; 362 default: 363 heredocquote: 364 Xcheck(ws, wp); 365 if (c) { 366 /* trailing \ is lost */ 367 *wp++ = CHAR; 368 *wp++ = '\\'; 369 *wp++ = CHAR; 370 *wp++ = c; 371 } 372 break; 373 } 374 break; 375 case '$': 376 subst_dollar: 377 c = getsc(); 378 if (c == '(') /*)*/ { 379 c = getsc(); 380 if (c == '(') /*)*/ { 381 PUSH_STATE(SASPAREN); 382 statep->ls_sasparen.nparen = 2; 383 statep->ls_sasparen.start = 384 Xsavepos(ws, wp); 385 *wp++ = EXPRSUB; 386 } else { 387 ungetsc(c); 388 PUSH_STATE(SCSPAREN); 389 statep->ls_scsparen.nparen = 1; 390 statep->ls_scsparen.csstate = 0; 391 *wp++ = COMSUB; 392 } 393 } else if (c == '{') /*}*/ { 394 *wp++ = OSUBST; 395 *wp++ = '{'; /*}*/ 396 wp = get_brace_var(&ws, wp); 397 c = getsc(); 398 /* allow :# and :% (ksh88 compat) */ 399 if (c == ':') { 400 *wp++ = CHAR; 401 *wp++ = c; 402 c = getsc(); 403 if (c == ':') { 404 *wp++ = CHAR; 405 *wp++ = '0'; 406 *wp++ = ADELIM; 407 *wp++ = ':'; 408 PUSH_STATE(SBRACE); 409 PUSH_STATE(SADELIM); 410 statep->ls_sadelim.style = SADELIM_BASH; 411 statep->ls_sadelim.delimiter = ':'; 412 statep->ls_sadelim.num = 1; 413 statep->ls_sadelim.nparen = 0; 414 break; 415 } else if (ksh_isdigit(c) || 416 c == '('/*)*/ || c == ' ' || 417 c == '$' /* XXX what else? */) { 418 /* substring subst. */ 419 if (c != ' ') { 420 *wp++ = CHAR; 421 *wp++ = ' '; 422 } 423 ungetsc(c); 424 PUSH_STATE(SBRACE); 425 PUSH_STATE(SADELIM); 426 statep->ls_sadelim.style = SADELIM_BASH; 427 statep->ls_sadelim.delimiter = ':'; 428 statep->ls_sadelim.num = 2; 429 statep->ls_sadelim.nparen = 0; 430 break; 431 } 432 } else if (c == '/') { 433 *wp++ = CHAR; 434 *wp++ = c; 435 if ((c = getsc()) == '/') { 436 *wp++ = ADELIM; 437 *wp++ = c; 438 } else 439 ungetsc(c); 440 PUSH_STATE(SBRACE); 441 PUSH_STATE(SADELIM); 442 statep->ls_sadelim.style = SADELIM_BASH; 443 statep->ls_sadelim.delimiter = '/'; 444 statep->ls_sadelim.num = 1; 445 statep->ls_sadelim.nparen = 0; 446 break; 447 } 448 /* If this is a trim operation, 449 * treat (,|,) specially in STBRACE. 450 */ 451 if (ctype(c, C_SUBOP2)) { 452 ungetsc(c); 453 PUSH_STATE(STBRACE); 454 } else { 455 ungetsc(c); 456 if (state == SDQUOTE) 457 PUSH_STATE(SQBRACE); 458 else 459 PUSH_STATE(SBRACE); 460 } 461 } else if (ksh_isalphx(c)) { 462 *wp++ = OSUBST; 463 *wp++ = 'X'; 464 do { 465 Xcheck(ws, wp); 466 *wp++ = c; 467 c = getsc(); 468 } while (ksh_isalnux(c)); 469 *wp++ = '\0'; 470 *wp++ = CSUBST; 471 *wp++ = 'X'; 472 ungetsc(c); 473 } else if (ctype(c, C_VAR1 | C_DIGIT)) { 474 Xcheck(ws, wp); 475 *wp++ = OSUBST; 476 *wp++ = 'X'; 477 *wp++ = c; 478 *wp++ = '\0'; 479 *wp++ = CSUBST; 480 *wp++ = 'X'; 481 } else if (c == '\'' && (state == SBASE)) { 482 /* XXX which other states are valid? */ 483 *wp++ = OQUOTE; 484 ignore_backslash_newline++; 485 PUSH_STATE(SEQUOTE); 486 statep->ls_sequote.got_NUL = false; 487 break; 488 } else { 489 *wp++ = CHAR; 490 *wp++ = '$'; 491 ungetsc(c); 492 } 493 break; 494 case '`': 495 subst_gravis: 496 PUSH_STATE(SBQUOTE); 497 *wp++ = COMSUB; 498 /* Need to know if we are inside double quotes 499 * since sh/AT&T-ksh translate the \" to " in 500 * "`...\"...`". 501 * This is not done in POSIX mode (section 502 * 3.2.3, Double Quotes: "The backquote shall 503 * retain its special meaning introducing the 504 * other form of command substitution (see 505 * 3.6.3). The portion of the quoted string 506 * from the initial backquote and the 507 * characters up to the next backquote that 508 * is not preceded by a backslash (having 509 * escape characters removed) defines that 510 * command whose output replaces `...` when 511 * the word is expanded." 512 * Section 3.6.3, Command Substitution: 513 * "Within the backquoted style of command 514 * substitution, backslash shall retain its 515 * literal meaning, except when followed by 516 * $ ` \."). 517 */ 518 statep->ls_sbquote.indquotes = 0; 519 s2 = statep; 520 base = state_info.base; 521 while (1) { 522 for (; s2 != base; s2--) { 523 if (s2->ls_state == SDQUOTE) { 524 statep->ls_sbquote.indquotes = 1; 525 break; 526 } 527 } 528 if (s2 != base) 529 break; 530 if (!(s2 = s2->ls_info.base)) 531 break; 532 base = s2-- - STATE_BSIZE; 533 } 534 break; 535 case QCHAR: 536 if (cf & LQCHAR) { 537 *wp++ = QCHAR; 538 *wp++ = getsc(); 539 break; 540 } 541 /* FALLTHROUGH */ 542 default: 543 store_char: 544 *wp++ = CHAR; 545 *wp++ = c; 546 } 547 break; 548 549 case SEQUOTE: 550 if (c == '\'') { 551 POP_STATE(); 552 *wp++ = CQUOTE; 553 ignore_backslash_newline--; 554 } else if (c == '\\') { 555 if ((c2 = unbksl(true, s_get, s_put)) == -1) 556 c2 = s_get(); 557 if (c2 == 0) 558 statep->ls_sequote.got_NUL = true; 559 if (!statep->ls_sequote.got_NUL) { 560 char ts[4]; 561 562 if ((unsigned int)c2 < 0x100) { 563 *wp++ = QCHAR; 564 *wp++ = c2; 565 } else { 566 c = utf_wctomb(ts, c2 - 0x100); 567 ts[c] = 0; 568 for (c = 0; ts[c]; ++c) { 569 *wp++ = QCHAR; 570 *wp++ = ts[c]; 571 } 572 } 573 } 574 } else if (!statep->ls_sequote.got_NUL) { 575 *wp++ = QCHAR; 576 *wp++ = c; 577 } 578 break; 579 580 case SSQUOTE: 581 if (c == '\'') { 582 POP_STATE(); 583 *wp++ = CQUOTE; 584 ignore_backslash_newline--; 585 } else { 586 *wp++ = QCHAR; 587 *wp++ = c; 588 } 589 break; 590 591 case SDQUOTE: 592 if (c == '"') { 593 POP_STATE(); 594 *wp++ = CQUOTE; 595 } else 596 goto Subst; 597 break; 598 599 case SCSPAREN: /* $( ... ) */ 600 /* todo: deal with $(...) quoting properly 601 * kludge to partly fake quoting inside $(...): doesn't 602 * really work because nested $(...) or ${...} inside 603 * double quotes aren't dealt with. 604 */ 605 switch (statep->ls_scsparen.csstate) { 606 case 0: /* normal */ 607 switch (c) { 608 case '(': 609 statep->ls_scsparen.nparen++; 610 break; 611 case ')': 612 statep->ls_scsparen.nparen--; 613 break; 614 case '\\': 615 statep->ls_scsparen.csstate = 1; 616 break; 617 case '"': 618 statep->ls_scsparen.csstate = 2; 619 break; 620 case '\'': 621 statep->ls_scsparen.csstate = 4; 622 ignore_backslash_newline++; 623 break; 624 } 625 break; 626 627 case 1: /* backslash in normal mode */ 628 case 3: /* backslash in double quotes */ 629 --statep->ls_scsparen.csstate; 630 break; 631 632 case 2: /* double quotes */ 633 if (c == '"') 634 statep->ls_scsparen.csstate = 0; 635 else if (c == '\\') 636 statep->ls_scsparen.csstate = 3; 637 break; 638 639 case 4: /* single quotes */ 640 if (c == '\'') { 641 statep->ls_scsparen.csstate = 0; 642 ignore_backslash_newline--; 643 } 644 break; 645 } 646 if (statep->ls_scsparen.nparen == 0) { 647 POP_STATE(); 648 *wp++ = 0; /* end of COMSUB */ 649 } else 650 *wp++ = c; 651 break; 652 653 case SASPAREN: /* $(( ... )) */ 654 /* XXX should nest using existing state machine 655 * (embed "...", $(...), etc.) */ 656 if (c == '(') 657 statep->ls_sasparen.nparen++; 658 else if (c == ')') { 659 statep->ls_sasparen.nparen--; 660 if (statep->ls_sasparen.nparen == 1) { 661 /*(*/ 662 if ((c2 = getsc()) == ')') { 663 POP_STATE(); 664 /* end of EXPRSUB */ 665 *wp++ = 0; 666 break; 667 } else { 668 char *s; 669 670 ungetsc(c2); 671 /* mismatched parenthesis - 672 * assume we were really 673 * parsing a $(...) expression 674 */ 675 s = Xrestpos(ws, wp, 676 statep->ls_sasparen.start); 677 memmove(s + 1, s, wp - s); 678 *s++ = COMSUB; 679 *s = '('; /*)*/ 680 wp++; 681 statep->ls_scsparen.nparen = 1; 682 statep->ls_scsparen.csstate = 0; 683 state = statep->ls_state = 684 SCSPAREN; 685 } 686 } 687 } 688 *wp++ = c; 689 break; 690 691 case SQBRACE: 692 if (c == '\\') { 693 /* 694 * perform POSIX "quote removal" if the back- 695 * slash is "special", i.e. same cases as the 696 * {case '\\':} in Subst: plus closing brace; 697 * in mksh code "quote removal" on '\c' means 698 * write QCHAR+c, otherwise CHAR+\+CHAR+c are 699 * emitted (in heredocquote:) 700 */ 701 if ((c = getsc()) == '"' || c == '\\' || 702 c == '$' || c == '`' || c == /*{*/'}') 703 goto store_qchar; 704 goto heredocquote; 705 } 706 goto common_SQBRACE; 707 708 case SBRACE: 709 if (c == '\'') 710 goto open_ssquote; 711 else if (c == '\\') 712 goto getsc_qchar; 713 common_SQBRACE: 714 if (c == '"') 715 goto open_sdquote; 716 else if (c == '$') 717 goto subst_dollar; 718 else if (c == '`') 719 goto subst_gravis; 720 else if (c != /*{*/ '}') 721 goto store_char; 722 POP_STATE(); 723 *wp++ = CSUBST; 724 *wp++ = /*{*/ '}'; 725 break; 726 727 case STBRACE: 728 /* Same as SBASE, except (,|,) treated specially */ 729 if (c == /*{*/ '}') { 730 POP_STATE(); 731 *wp++ = CSUBST; 732 *wp++ = /*{*/ '}'; 733 } else if (c == '|') { 734 *wp++ = SPAT; 735 } else if (c == '(') { 736 *wp++ = OPAT; 737 *wp++ = ' '; /* simile for @ */ 738 PUSH_STATE(SPATTERN); 739 } else 740 goto Sbase1; 741 break; 742 743 case SBQUOTE: 744 if (c == '`') { 745 *wp++ = 0; 746 POP_STATE(); 747 } else if (c == '\\') { 748 switch (c = getsc()) { 749 case '\\': 750 case '$': case '`': 751 *wp++ = c; 752 break; 753 case '"': 754 if (statep->ls_sbquote.indquotes) { 755 *wp++ = c; 756 break; 757 } 758 /* FALLTHROUGH */ 759 default: 760 if (c) { 761 /* trailing \ is lost */ 762 *wp++ = '\\'; 763 *wp++ = c; 764 } 765 break; 766 } 767 } else 768 *wp++ = c; 769 break; 770 771 case SWORD: /* ONEWORD */ 772 goto Subst; 773 774 case SLETPAREN: /* LETEXPR: (( ... )) */ 775 /*(*/ 776 if (c == ')') { 777 if (statep->ls_sletparen.nparen > 0) 778 --statep->ls_sletparen.nparen; 779 else if ((c2 = getsc()) == /*(*/ ')') { 780 c = 0; 781 *wp++ = CQUOTE; 782 goto Done; 783 } else { 784 Source *s; 785 786 ungetsc(c2); 787 /* mismatched parenthesis - 788 * assume we were really 789 * parsing a $(...) expression 790 */ 791 *wp = EOS; 792 sp = Xstring(ws, wp); 793 dp = wdstrip(sp, true, false); 794 s = pushs(SREREAD, source->areap); 795 s->start = s->str = s->u.freeme = dp; 796 s->next = source; 797 source = s; 798 return ('('/*)*/); 799 } 800 } else if (c == '(') 801 /* parenthesis inside quotes and backslashes 802 * are lost, but AT&T ksh doesn't count them 803 * either 804 */ 805 ++statep->ls_sletparen.nparen; 806 goto Sbase2; 807 808 #ifndef MKSH_SMALL 809 case SLETARRAY: /* LETARRAY: =( ... ) */ 810 if (c == '('/*)*/) 811 ++statep->ls_sletarray.nparen; 812 else if (c == /*(*/')') 813 if (statep->ls_sletarray.nparen-- == 0) { 814 c = 0; 815 goto Done; 816 } 817 *wp++ = CHAR; 818 *wp++ = c; 819 break; 820 #endif 821 822 case SHERESTRING: /* <<< delimiter */ 823 if (c == '\\') { 824 c = getsc(); 825 if (c) { 826 /* trailing \ is lost */ 827 *wp++ = QCHAR; 828 *wp++ = c; 829 } 830 /* invoke quoting mode */ 831 Xstring(ws, wp)[0] = QCHAR; 832 } else if (c == '$') { 833 if ((c2 = getsc()) == '\'') { 834 PUSH_STATE(SEQUOTE); 835 statep->ls_sequote.got_NUL = false; 836 goto sherestring_quoted; 837 } 838 ungetsc(c2); 839 goto sherestring_regular; 840 } else if (c == '\'') { 841 PUSH_STATE(SSQUOTE); 842 sherestring_quoted: 843 *wp++ = OQUOTE; 844 ignore_backslash_newline++; 845 /* invoke quoting mode */ 846 Xstring(ws, wp)[0] = QCHAR; 847 } else if (c == '"') { 848 state = statep->ls_state = SHEREDQUOTE; 849 *wp++ = OQUOTE; 850 /* just don't IFS split; no quoting mode */ 851 } else { 852 sherestring_regular: 853 *wp++ = CHAR; 854 *wp++ = c; 855 } 856 break; 857 858 case SHEREDELIM: /* <<,<<- delimiter */ 859 /* XXX chuck this state (and the next) - use 860 * the existing states ($ and \`...` should be 861 * stripped of their specialness after the 862 * fact). 863 */ 864 /* here delimiters need a special case since 865 * $ and `...` are not to be treated specially 866 */ 867 if (c == '\\') { 868 c = getsc(); 869 if (c) { 870 /* trailing \ is lost */ 871 *wp++ = QCHAR; 872 *wp++ = c; 873 } 874 } else if (c == '$') { 875 if ((c2 = getsc()) == '\'') { 876 PUSH_STATE(SEQUOTE); 877 statep->ls_sequote.got_NUL = false; 878 goto sheredelim_quoted; 879 } 880 ungetsc(c2); 881 goto sheredelim_regular; 882 } else if (c == '\'') { 883 PUSH_STATE(SSQUOTE); 884 sheredelim_quoted: 885 *wp++ = OQUOTE; 886 ignore_backslash_newline++; 887 } else if (c == '"') { 888 state = statep->ls_state = SHEREDQUOTE; 889 *wp++ = OQUOTE; 890 } else { 891 sheredelim_regular: 892 *wp++ = CHAR; 893 *wp++ = c; 894 } 895 break; 896 897 case SHEREDQUOTE: /* " in <<,<<- delimiter */ 898 if (c == '"') { 899 *wp++ = CQUOTE; 900 state = statep->ls_state = 901 /* dp[1] == '<' means here string */ 902 Xstring(ws, wp)[1] == '<' ? 903 SHERESTRING : SHEREDELIM; 904 } else { 905 if (c == '\\') { 906 switch (c = getsc()) { 907 case '\\': case '"': 908 case '$': case '`': 909 break; 910 default: 911 if (c) { 912 /* trailing \ lost */ 913 *wp++ = CHAR; 914 *wp++ = '\\'; 915 } 916 break; 917 } 918 } 919 *wp++ = CHAR; 920 *wp++ = c; 921 } 922 break; 923 924 case SPATTERN: /* in *(...|...) pattern (*+?@!) */ 925 if ( /*(*/ c == ')') { 926 *wp++ = CPAT; 927 POP_STATE(); 928 } else if (c == '|') { 929 *wp++ = SPAT; 930 } else if (c == '(') { 931 *wp++ = OPAT; 932 *wp++ = ' '; /* simile for @ */ 933 PUSH_STATE(SPATTERN); 934 } else 935 goto Sbase1; 936 break; 937 } 938 } 939 Done: 940 Xcheck(ws, wp); 941 if (statep != &states[1]) 942 /* XXX figure out what is missing */ 943 yyerror("no closing quote\n"); 944 945 #ifndef MKSH_SMALL 946 if (state == SLETARRAY && statep->ls_sletarray.nparen != -1) 947 yyerror("%s: ')' missing\n", T_synerr); 948 #endif 949 950 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 951 if (state == SHEREDELIM || state == SHERESTRING) 952 state = SBASE; 953 954 dp = Xstring(ws, wp); 955 if ((c == '<' || c == '>' || c == '&') && state == SBASE) { 956 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP); 957 958 if (Xlength(ws, wp) == 0) 959 iop->unit = c == '<' ? 0 : 1; 960 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) { 961 if (dp[c2] != CHAR) 962 goto no_iop; 963 if (!ksh_isdigit(dp[c2 + 1])) 964 goto no_iop; 965 iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0'; 966 } 967 968 if (iop->unit >= FDBASE) 969 goto no_iop; 970 971 if (c == '&') { 972 if ((c2 = getsc()) != '>') { 973 ungetsc(c2); 974 goto no_iop; 975 } 976 c = c2; 977 iop->flag = IOBASH; 978 } else 979 iop->flag = 0; 980 981 c2 = getsc(); 982 /* <<, >>, <> are ok, >< is not */ 983 if (c == c2 || (c == '<' && c2 == '>')) { 984 iop->flag |= c == c2 ? 985 (c == '>' ? IOCAT : IOHERE) : IORDWR; 986 if (iop->flag == IOHERE) { 987 if ((c2 = getsc()) == '-') 988 iop->flag |= IOSKIP; 989 else 990 ungetsc(c2); 991 } 992 } else if (c2 == '&') 993 iop->flag |= IODUP | (c == '<' ? IORDUP : 0); 994 else { 995 iop->flag |= c == '>' ? IOWRITE : IOREAD; 996 if (c == '>' && c2 == '|') 997 iop->flag |= IOCLOB; 998 else 999 ungetsc(c2); 1000 } 1001 1002 iop->name = NULL; 1003 iop->delim = NULL; 1004 iop->heredoc = NULL; 1005 Xfree(ws, wp); /* free word */ 1006 yylval.iop = iop; 1007 return (REDIR); 1008 no_iop: 1009 ; 1010 } 1011 1012 if (wp == dp && state == SBASE) { 1013 Xfree(ws, wp); /* free word */ 1014 /* no word, process LEX1 character */ 1015 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) { 1016 if ((c2 = getsc()) == c) 1017 c = (c == ';') ? BREAK : 1018 (c == '|') ? LOGOR : 1019 (c == '&') ? LOGAND : 1020 /* c == '(' ) */ MDPAREN; 1021 else if (c == '|' && c2 == '&') 1022 c = COPROC; 1023 else 1024 ungetsc(c2); 1025 } else if (c == '\n') { 1026 gethere(false); 1027 if (cf & CONTIN) 1028 goto Again; 1029 } else if (c == '\0') 1030 /* need here strings at EOF */ 1031 gethere(true); 1032 return (c); 1033 } 1034 1035 *wp++ = EOS; /* terminate word */ 1036 yylval.cp = Xclose(ws, wp); 1037 if (state == SWORD || state == SLETPAREN 1038 /* XXX ONEWORD? */ 1039 #ifndef MKSH_SMALL 1040 || state == SLETARRAY 1041 #endif 1042 ) 1043 return (LWORD); 1044 1045 /* unget terminator */ 1046 ungetsc(c); 1047 1048 /* 1049 * note: the alias-vs-function code below depends on several 1050 * interna: starting from here, source->str is not modified; 1051 * the way getsc() and ungetsc() operate; etc. 1052 */ 1053 1054 /* copy word to unprefixed string ident */ 1055 sp = yylval.cp; 1056 dp = ident; 1057 if ((cf & HEREDELIM) && (sp[1] == '<')) 1058 while (dp < ident+IDENT) { 1059 if ((c = *sp++) == CHAR) 1060 *dp++ = *sp++; 1061 else if ((c != OQUOTE) && (c != CQUOTE)) 1062 break; 1063 } 1064 else 1065 while (dp < ident+IDENT && (c = *sp++) == CHAR) 1066 *dp++ = *sp++; 1067 /* Make sure the ident array stays '\0' padded */ 1068 memset(dp, 0, (ident+IDENT) - dp + 1); 1069 if (c != EOS) 1070 *ident = '\0'; /* word is not unquoted */ 1071 1072 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { 1073 struct tbl *p; 1074 uint32_t h = hash(ident); 1075 1076 /* { */ 1077 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) && 1078 (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) { 1079 afree(yylval.cp, ATEMP); 1080 return (p->val.i); 1081 } 1082 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) && 1083 (p->flag & ISSET)) { 1084 /* 1085 * this still points to the same character as the 1086 * ungetsc'd terminator from above 1087 */ 1088 const char *cp = source->str; 1089 1090 /* prefer POSIX but not Korn functions over aliases */ 1091 while (*cp == ' ' || *cp == '\t') 1092 /* 1093 * this is like getsc() without skipping 1094 * over Source boundaries (including not 1095 * parsing ungetsc'd characters that got 1096 * pushed into an SREREAD) which is what 1097 * we want here anyway: find out whether 1098 * the alias name is followed by a POSIX 1099 * function definition (only the opening 1100 * parenthesis is checked though) 1101 */ 1102 ++cp; 1103 /* prefer functions over aliases */ 1104 if (*cp == '(' /*)*/) 1105 /* 1106 * delete alias upon encountering function 1107 * definition 1108 */ 1109 ktdelete(p); 1110 else { 1111 Source *s = source; 1112 1113 while (s && (s->flags & SF_HASALIAS)) 1114 if (s->u.tblp == p) 1115 return (LWORD); 1116 else 1117 s = s->next; 1118 /* push alias expansion */ 1119 s = pushs(SALIAS, source->areap); 1120 s->start = s->str = p->val.s; 1121 s->u.tblp = p; 1122 s->flags |= SF_HASALIAS; 1123 s->next = source; 1124 if (source->type == SEOF) { 1125 /* prevent infinite recursion at EOS */ 1126 source->u.tblp = p; 1127 source->flags |= SF_HASALIAS; 1128 } 1129 source = s; 1130 afree(yylval.cp, ATEMP); 1131 goto Again; 1132 } 1133 } 1134 } 1135 1136 return (LWORD); 1137 } 1138 1139 static void 1140 gethere(bool iseof) 1141 { 1142 struct ioword **p; 1143 1144 for (p = heres; p < herep; p++) 1145 if (iseof && (*p)->delim[1] != '<') 1146 /* only here strings at EOF */ 1147 return; 1148 else 1149 readhere(*p); 1150 herep = heres; 1151 } 1152 1153 /* 1154 * read "<<word" text into temp file 1155 */ 1156 1157 static void 1158 readhere(struct ioword *iop) 1159 { 1160 int c; 1161 char *volatile eof; 1162 char *eofp; 1163 int skiptabs; 1164 XString xs; 1165 char *xp; 1166 int xpos; 1167 1168 if (iop->delim[1] == '<') { 1169 /* process the here string */ 1170 xp = iop->heredoc = evalstr(iop->delim, DOBLANK); 1171 c = strlen(xp) - 1; 1172 memmove(xp, xp + 1, c); 1173 xp[c] = '\n'; 1174 return; 1175 } 1176 1177 eof = evalstr(iop->delim, 0); 1178 1179 if (!(iop->flag & IOEVAL)) 1180 ignore_backslash_newline++; 1181 1182 Xinit(xs, xp, 256, ATEMP); 1183 1184 for (;;) { 1185 eofp = eof; 1186 skiptabs = iop->flag & IOSKIP; 1187 xpos = Xsavepos(xs, xp); 1188 while ((c = getsc()) != 0) { 1189 if (skiptabs) { 1190 if (c == '\t') 1191 continue; 1192 skiptabs = 0; 1193 } 1194 if (c != *eofp) 1195 break; 1196 Xcheck(xs, xp); 1197 Xput(xs, xp, c); 1198 eofp++; 1199 } 1200 /* Allow EOF here so commands with out trailing newlines 1201 * will work (eg, ksh -c '...', $(...), etc). 1202 */ 1203 if (*eofp == '\0' && (c == 0 || c == '\n')) { 1204 xp = Xrestpos(xs, xp, xpos); 1205 break; 1206 } 1207 ungetsc(c); 1208 while ((c = getsc()) != '\n') { 1209 if (c == 0) 1210 yyerror("here document '%s' unclosed\n", eof); 1211 Xcheck(xs, xp); 1212 Xput(xs, xp, c); 1213 } 1214 Xcheck(xs, xp); 1215 Xput(xs, xp, c); 1216 } 1217 Xput(xs, xp, '\0'); 1218 iop->heredoc = Xclose(xs, xp); 1219 1220 if (!(iop->flag & IOEVAL)) 1221 ignore_backslash_newline--; 1222 } 1223 1224 void 1225 yyerror(const char *fmt, ...) 1226 { 1227 va_list va; 1228 1229 /* pop aliases and re-reads */ 1230 while (source->type == SALIAS || source->type == SREREAD) 1231 source = source->next; 1232 source->str = null; /* zap pending input */ 1233 1234 error_prefix(true); 1235 va_start(va, fmt); 1236 shf_vfprintf(shl_out, fmt, va); 1237 va_end(va); 1238 errorfz(); 1239 } 1240 1241 /* 1242 * input for yylex with alias expansion 1243 */ 1244 1245 Source * 1246 pushs(int type, Area *areap) 1247 { 1248 Source *s; 1249 1250 s = alloc(sizeof(Source), areap); 1251 memset(s, 0, sizeof(Source)); 1252 s->type = type; 1253 s->str = null; 1254 s->areap = areap; 1255 if (type == SFILE || type == SSTDIN) 1256 XinitN(s->xs, 256, s->areap); 1257 return (s); 1258 } 1259 1260 static int 1261 getsc__(void) 1262 { 1263 Source *s = source; 1264 int c; 1265 1266 getsc_again: 1267 while ((c = *s->str++) == 0) { 1268 s->str = NULL; /* return 0 for EOF by default */ 1269 switch (s->type) { 1270 case SEOF: 1271 s->str = null; 1272 return (0); 1273 1274 case SSTDIN: 1275 case SFILE: 1276 getsc_line(s); 1277 break; 1278 1279 case SWSTR: 1280 break; 1281 1282 case SSTRING: 1283 break; 1284 1285 case SWORDS: 1286 s->start = s->str = *s->u.strv++; 1287 s->type = SWORDSEP; 1288 break; 1289 1290 case SWORDSEP: 1291 if (*s->u.strv == NULL) { 1292 s->start = s->str = "\n"; 1293 s->type = SEOF; 1294 } else { 1295 s->start = s->str = " "; 1296 s->type = SWORDS; 1297 } 1298 break; 1299 1300 case SALIAS: 1301 if (s->flags & SF_ALIASEND) { 1302 /* pass on an unused SF_ALIAS flag */ 1303 source = s->next; 1304 source->flags |= s->flags & SF_ALIAS; 1305 s = source; 1306 } else if (*s->u.tblp->val.s && 1307 (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) { 1308 source = s = s->next; /* pop source stack */ 1309 /* Note that this alias ended with a space, 1310 * enabling alias expansion on the following 1311 * word. 1312 */ 1313 s->flags |= SF_ALIAS; 1314 } else { 1315 /* At this point, we need to keep the current 1316 * alias in the source list so recursive 1317 * aliases can be detected and we also need 1318 * to return the next character. Do this 1319 * by temporarily popping the alias to get 1320 * the next character and then put it back 1321 * in the source list with the SF_ALIASEND 1322 * flag set. 1323 */ 1324 source = s->next; /* pop source stack */ 1325 source->flags |= s->flags & SF_ALIAS; 1326 c = getsc__(); 1327 if (c) { 1328 s->flags |= SF_ALIASEND; 1329 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1330 s->start = s->str = s->ugbuf; 1331 s->next = source; 1332 source = s; 1333 } else { 1334 s = source; 1335 /* avoid reading eof twice */ 1336 s->str = NULL; 1337 break; 1338 } 1339 } 1340 continue; 1341 1342 case SREREAD: 1343 if (s->start != s->ugbuf) /* yuck */ 1344 afree(s->u.freeme, ATEMP); 1345 source = s = s->next; 1346 continue; 1347 } 1348 if (s->str == NULL) { 1349 s->type = SEOF; 1350 s->start = s->str = null; 1351 return ('\0'); 1352 } 1353 if (s->flags & SF_ECHO) { 1354 shf_puts(s->str, shl_out); 1355 shf_flush(shl_out); 1356 } 1357 } 1358 /* check for UTF-8 byte order mark */ 1359 if (s->flags & SF_FIRST) { 1360 s->flags &= ~SF_FIRST; 1361 if (((unsigned char)c == 0xEF) && 1362 (((const unsigned char *)(s->str))[0] == 0xBB) && 1363 (((const unsigned char *)(s->str))[1] == 0xBF)) { 1364 s->str += 2; 1365 UTFMODE = 1; 1366 goto getsc_again; 1367 } 1368 } 1369 return (c); 1370 } 1371 1372 static void 1373 getsc_line(Source *s) 1374 { 1375 char *xp = Xstring(s->xs, xp), *cp; 1376 bool interactive = Flag(FTALKING) && s->type == SSTDIN; 1377 int have_tty = interactive && (s->flags & SF_TTY); 1378 1379 /* Done here to ensure nothing odd happens when a timeout occurs */ 1380 XcheckN(s->xs, xp, LINE); 1381 *xp = '\0'; 1382 s->start = s->str = xp; 1383 1384 if (have_tty && ksh_tmout) { 1385 ksh_tmout_state = TMOUT_READING; 1386 alarm(ksh_tmout); 1387 } 1388 if (interactive) 1389 change_winsz(); 1390 if (have_tty && ( 1391 #if !MKSH_S_NOVI 1392 Flag(FVI) || 1393 #endif 1394 Flag(FEMACS) || Flag(FGMACS))) { 1395 int nread; 1396 1397 nread = x_read(xp, LINE); 1398 if (nread < 0) /* read error */ 1399 nread = 0; 1400 xp[nread] = '\0'; 1401 xp += nread; 1402 } else { 1403 if (interactive) 1404 pprompt(prompt, 0); 1405 else 1406 s->line++; 1407 1408 while (1) { 1409 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1410 1411 if (!p && shf_error(s->u.shf) && 1412 shf_errno(s->u.shf) == EINTR) { 1413 shf_clearerr(s->u.shf); 1414 if (trap) 1415 runtraps(0); 1416 continue; 1417 } 1418 if (!p || (xp = p, xp[-1] == '\n')) 1419 break; 1420 /* double buffer size */ 1421 xp++; /* move past NUL so doubling works... */ 1422 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1423 xp--; /* ...and move back again */ 1424 } 1425 /* flush any unwanted input so other programs/builtins 1426 * can read it. Not very optimal, but less error prone 1427 * than flushing else where, dealing with redirections, 1428 * etc. 1429 * todo: reduce size of shf buffer (~128?) if SSTDIN 1430 */ 1431 if (s->type == SSTDIN) 1432 shf_flush(s->u.shf); 1433 } 1434 /* XXX: temporary kludge to restore source after a 1435 * trap may have been executed. 1436 */ 1437 source = s; 1438 if (have_tty && ksh_tmout) { 1439 ksh_tmout_state = TMOUT_EXECUTING; 1440 alarm(0); 1441 } 1442 cp = Xstring(s->xs, xp); 1443 #ifndef MKSH_SMALL 1444 if (interactive && *cp == '!' && cur_prompt == PS1) { 1445 int linelen; 1446 1447 linelen = Xlength(s->xs, xp); 1448 XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1); 1449 /* reload after potential realloc */ 1450 cp = Xstring(s->xs, xp); 1451 /* change initial '!' into space */ 1452 *cp = ' '; 1453 /* NUL terminate the current string */ 1454 *xp = '\0'; 1455 /* move the actual string forward */ 1456 memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1); 1457 xp += fc_e_n; 1458 /* prepend it with "fc -e -" */ 1459 memcpy(cp, fc_e_, fc_e_n); 1460 } 1461 #endif 1462 s->start = s->str = cp; 1463 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1464 /* Note: if input is all nulls, this is not eof */ 1465 if (Xlength(s->xs, xp) == 0) { 1466 /* EOF */ 1467 if (s->type == SFILE) 1468 shf_fdclose(s->u.shf); 1469 s->str = NULL; 1470 } else if (interactive && *s->str && 1471 (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) { 1472 histsave(&s->line, s->str, true, true); 1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY 1474 } else if (interactive && cur_prompt == PS1) { 1475 cp = Xstring(s->xs, xp); 1476 while (*cp && ctype(*cp, C_IFSWS)) 1477 ++cp; 1478 if (!*cp) 1479 histsync(); 1480 #endif 1481 } 1482 if (interactive) 1483 set_prompt(PS2, NULL); 1484 } 1485 1486 void 1487 set_prompt(int to, Source *s) 1488 { 1489 cur_prompt = to; 1490 1491 switch (to) { 1492 case PS1: /* command */ 1493 /* Substitute ! and !! here, before substitutions are done 1494 * so ! in expanded variables are not expanded. 1495 * NOTE: this is not what AT&T ksh does (it does it after 1496 * substitutions, POSIX doesn't say which is to be done. 1497 */ 1498 { 1499 struct shf *shf; 1500 char * volatile ps1; 1501 Area *saved_atemp; 1502 1503 ps1 = str_val(global("PS1")); 1504 shf = shf_sopen(NULL, strlen(ps1) * 2, 1505 SHF_WR | SHF_DYNAMIC, NULL); 1506 while (*ps1) 1507 if (*ps1 != '!' || *++ps1 == '!') 1508 shf_putchar(*ps1++, shf); 1509 else 1510 shf_fprintf(shf, "%d", 1511 s ? s->line + 1 : 0); 1512 ps1 = shf_sclose(shf); 1513 saved_atemp = ATEMP; 1514 newenv(E_ERRH); 1515 if (sigsetjmp(e->jbuf, 0)) { 1516 prompt = safe_prompt; 1517 /* Don't print an error - assume it has already 1518 * been printed. Reason is we may have forked 1519 * to run a command and the child may be 1520 * unwinding its stack through this code as it 1521 * exits. 1522 */ 1523 } else { 1524 char *cp = substitute(ps1, 0); 1525 strdupx(prompt, cp, saved_atemp); 1526 } 1527 quitenv(NULL); 1528 } 1529 break; 1530 case PS2: /* command continuation */ 1531 prompt = str_val(global("PS2")); 1532 break; 1533 } 1534 } 1535 1536 static int 1537 dopprompt(const char *cp, int ntruncate, bool doprint) 1538 { 1539 int columns = 0, lines = 0, indelimit = 0; 1540 char delimiter = 0; 1541 1542 /* Undocumented AT&T ksh feature: 1543 * If the second char in the prompt string is \r then the first char 1544 * is taken to be a non-printing delimiter and any chars between two 1545 * instances of the delimiter are not considered to be part of the 1546 * prompt length 1547 */ 1548 if (*cp && cp[1] == '\r') { 1549 delimiter = *cp; 1550 cp += 2; 1551 } 1552 for (; *cp; cp++) { 1553 if (indelimit && *cp != delimiter) 1554 ; 1555 else if (*cp == '\n' || *cp == '\r') { 1556 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0); 1557 columns = 0; 1558 } else if (*cp == '\t') { 1559 columns = (columns | 7) + 1; 1560 } else if (*cp == '\b') { 1561 if (columns > 0) 1562 columns--; 1563 } else if (*cp == delimiter) 1564 indelimit = !indelimit; 1565 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) { 1566 const char *cp2; 1567 columns += utf_widthadj(cp, &cp2); 1568 if (doprint && (indelimit || 1569 (ntruncate < (x_cols * lines + columns)))) 1570 shf_write(cp, cp2 - cp, shl_out); 1571 cp = cp2 - /* loop increment */ 1; 1572 continue; 1573 } else 1574 columns++; 1575 if (doprint && (*cp != delimiter) && 1576 (indelimit || (ntruncate < (x_cols * lines + columns)))) 1577 shf_putc(*cp, shl_out); 1578 } 1579 if (doprint) 1580 shf_flush(shl_out); 1581 return (x_cols * lines + columns); 1582 } 1583 1584 1585 void 1586 pprompt(const char *cp, int ntruncate) 1587 { 1588 dopprompt(cp, ntruncate, true); 1589 } 1590 1591 int 1592 promptlen(const char *cp) 1593 { 1594 return (dopprompt(cp, 0, false)); 1595 } 1596 1597 /* Read the variable part of a ${...} expression (ie, up to but not including 1598 * the :[-+?=#%] or close-brace. 1599 */ 1600 static char * 1601 get_brace_var(XString *wsp, char *wp) 1602 { 1603 enum parse_state { 1604 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1605 PS_NUMBER, PS_VAR1 1606 } state; 1607 char c; 1608 1609 state = PS_INITIAL; 1610 while (1) { 1611 c = getsc(); 1612 /* State machine to figure out where the variable part ends. */ 1613 switch (state) { 1614 case PS_INITIAL: 1615 if (c == '#' || c == '!' || c == '%') { 1616 state = PS_SAW_HASH; 1617 break; 1618 } 1619 /* FALLTHROUGH */ 1620 case PS_SAW_HASH: 1621 if (ksh_isalphx(c)) 1622 state = PS_IDENT; 1623 else if (ksh_isdigit(c)) 1624 state = PS_NUMBER; 1625 else if (ctype(c, C_VAR1)) 1626 state = PS_VAR1; 1627 else 1628 goto out; 1629 break; 1630 case PS_IDENT: 1631 if (!ksh_isalnux(c)) { 1632 if (c == '[') { 1633 char *tmp, *p; 1634 1635 if (!arraysub(&tmp)) 1636 yyerror("missing ]\n"); 1637 *wp++ = c; 1638 for (p = tmp; *p; ) { 1639 Xcheck(*wsp, wp); 1640 *wp++ = *p++; 1641 } 1642 afree(tmp, ATEMP); 1643 c = getsc(); /* the ] */ 1644 } 1645 goto out; 1646 } 1647 break; 1648 case PS_NUMBER: 1649 if (!ksh_isdigit(c)) 1650 goto out; 1651 break; 1652 case PS_VAR1: 1653 goto out; 1654 } 1655 Xcheck(*wsp, wp); 1656 *wp++ = c; 1657 } 1658 out: 1659 *wp++ = '\0'; /* end of variable part */ 1660 ungetsc(c); 1661 return (wp); 1662 } 1663 1664 /* 1665 * Save an array subscript - returns true if matching bracket found, false 1666 * if eof or newline was found. 1667 * (Returned string double null terminated) 1668 */ 1669 static int 1670 arraysub(char **strp) 1671 { 1672 XString ws; 1673 char *wp; 1674 char c; 1675 int depth = 1; /* we are just past the initial [ */ 1676 1677 Xinit(ws, wp, 32, ATEMP); 1678 1679 do { 1680 c = getsc(); 1681 Xcheck(ws, wp); 1682 *wp++ = c; 1683 if (c == '[') 1684 depth++; 1685 else if (c == ']') 1686 depth--; 1687 } while (depth > 0 && c && c != '\n'); 1688 1689 *wp++ = '\0'; 1690 *strp = Xclose(ws, wp); 1691 1692 return (depth == 0 ? 1 : 0); 1693 } 1694 1695 /* Unget a char: handles case when we are already at the start of the buffer */ 1696 static const char * 1697 ungetsc(int c) 1698 { 1699 if (backslash_skip) 1700 backslash_skip--; 1701 /* Don't unget eof... */ 1702 if (source->str == null && c == '\0') 1703 return (source->str); 1704 if (source->str > source->start) 1705 source->str--; 1706 else { 1707 Source *s; 1708 1709 s = pushs(SREREAD, source->areap); 1710 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1711 s->start = s->str = s->ugbuf; 1712 s->next = source; 1713 source = s; 1714 } 1715 return (source->str); 1716 } 1717 1718 1719 /* Called to get a char that isn't a \newline sequence. */ 1720 static int 1721 getsc_bn(void) 1722 { 1723 int c, c2; 1724 1725 if (ignore_backslash_newline) 1726 return (getsc_()); 1727 1728 if (backslash_skip == 1) { 1729 backslash_skip = 2; 1730 return (getsc_()); 1731 } 1732 1733 backslash_skip = 0; 1734 1735 while (1) { 1736 c = getsc_(); 1737 if (c == '\\') { 1738 if ((c2 = getsc_()) == '\n') 1739 /* ignore the \newline; get the next char... */ 1740 continue; 1741 ungetsc(c2); 1742 backslash_skip = 1; 1743 } 1744 return (c); 1745 } 1746 } 1747 1748 static Lex_state * 1749 push_state_(State_info *si, Lex_state *old_end) 1750 { 1751 Lex_state *news = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP); 1752 1753 news[0].ls_info.base = old_end; 1754 si->base = &news[0]; 1755 si->end = &news[STATE_BSIZE]; 1756 return (&news[1]); 1757 } 1758 1759 static Lex_state * 1760 pop_state_(State_info *si, Lex_state *old_end) 1761 { 1762 Lex_state *old_base = si->base; 1763 1764 si->base = old_end->ls_info.base - STATE_BSIZE; 1765 si->end = old_end->ls_info.base; 1766 1767 afree(old_base, ATEMP); 1768 1769 return (si->base + STATE_BSIZE - 1); 1770 } 1771 1772 static int 1773 s_get(void) 1774 { 1775 return (getsc()); 1776 } 1777 1778 static void 1779 s_put(int c) 1780 { 1781 ungetsc(c); 1782 } 1783