1 /* GNU SED, a batch stream editor. 2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008 3 Free Software Foundation, Inc. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 /* compile.c: translate sed source into internal form */ 20 21 #include "sed.h" 22 #include <stdio.h> 23 #include <ctype.h> 24 25 #ifdef HAVE_STRINGS_H 26 # include <strings.h> 27 # ifdef HAVE_MEMORY_H 28 # include <memory.h> 29 # endif 30 #else 31 # include <string.h> 32 #endif /* HAVE_STRINGS_H */ 33 34 #ifdef HAVE_STDLIB_H 35 # include <stdlib.h> 36 #endif 37 #ifndef EXIT_FAILURE 38 # define EXIT_FAILURE 1 39 #endif 40 41 #ifdef HAVE_SYS_TYPES_H 42 # include <sys/types.h> 43 #endif 44 45 #include <obstack.h> 46 47 48 #define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/ 50 #define VECTOR_ALLOC_INCREMENT 40 51 52 /* let's not confuse text editors that have only dumb bracket-matching... */ 53 #define OPEN_BRACKET '[' 54 #define CLOSE_BRACKET ']' 55 #define OPEN_BRACE '{' 56 #define CLOSE_BRACE '}' 57 58 struct prog_info { 59 /* When we're reading a script command from a string, `prog.base' 60 points to the first character in the string, 'prog.cur' points 61 to the current character in the string, and 'prog.end' points 62 to the end of the string. This allows us to compile script 63 strings that contain nulls. */ 64 const unsigned char *base; 65 const unsigned char *cur; 66 const unsigned char *end; 67 68 /* This is the current script file. If it is NULL, we are reading 69 from a string stored at `prog.cur' instead. If both `prog.file' 70 and `prog.cur' are NULL, we're in trouble! */ 71 FILE *file; 72 }; 73 74 /* Information used to give out useful and informative error messages. */ 75 struct error_info { 76 /* This is the name of the current script file. */ 77 const char *name; 78 79 /* This is the number of the current script line that we're compiling. */ 80 countT line; 81 82 /* This is the index of the "-e" expressions on the command line. */ 83 countT string_expr_count; 84 }; 85 86 87 /* Label structure used to resolve GOTO's, labels, and block beginnings. */ 88 struct sed_label { 89 countT v_index; /* index of vector element being referenced */ 90 char *name; /* NUL-terminated name of the label */ 91 struct error_info err_info; /* track where `{}' blocks start */ 92 struct sed_label *next; /* linked list (stack) */ 93 }; 94 95 struct special_files { 96 struct output outf; 97 FILE **pfp; 98 }; 99 100 FILE *my_stdin, *my_stdout, *my_stderr; 101 struct special_files special_files[] = { 102 { { "/dev/stdin", false, NULL, NULL }, &my_stdin }, 103 { { "/dev/stdout", false, NULL, NULL }, &my_stdout }, 104 { { "/dev/stderr", false, NULL, NULL }, &my_stderr }, 105 { { NULL, false, NULL, NULL }, NULL } 106 }; 107 108 109 /* Where we are in the processing of the input. */ 111 static struct prog_info prog; 112 static struct error_info cur_input; 113 114 /* Information about labels and jumps-to-labels. This is used to do 115 the required backpatching after we have compiled all the scripts. */ 116 static struct sed_label *jumps = NULL; 117 static struct sed_label *labels = NULL; 118 119 /* We wish to detect #n magic only in the first input argument; 120 this flag tracks when we have consumed the first file of input. */ 121 static bool first_script = true; 122 123 /* Allow for scripts like "sed -e 'i\' -e foo": */ 124 static struct buffer *pending_text = NULL; 125 static struct text_buf *old_text_buf = NULL; 126 127 /* Information about block start positions. This is used to backpatch 128 block end positions. */ 129 static struct sed_label *blocks = NULL; 130 131 /* Use an obstack for compilation. */ 132 static struct obstack obs; 133 134 /* Various error messages we may want to print */ 135 static const char errors[] = 136 "multiple `!'s\0" 137 "unexpected `,'\0" 138 "invalid usage of +N or ~N as first address\0" 139 "unmatched `{'\0" 140 "unexpected `}'\0" 141 "extra characters after command\0" 142 "expected \\ after `a', `c' or `i'\0" 143 "`}' doesn't want any addresses\0" 144 ": doesn't want any addresses\0" 145 "comments don't accept any addresses\0" 146 "missing command\0" 147 "command only uses one address\0" 148 "unterminated address regex\0" 149 "unterminated `s' command\0" 150 "unterminated `y' command\0" 151 "unknown option to `s'\0" 152 "multiple `p' options to `s' command\0" 153 "multiple `g' options to `s' command\0" 154 "multiple number options to `s' command\0" 155 "number option to `s' command may not be zero\0" 156 "strings for `y' command are different lengths\0" 157 "delimiter character is not a single-byte character\0" 158 "expected newer version of sed\0" 159 "invalid usage of line address 0\0" 160 "unknown command: `%c'"; 161 162 #define BAD_BANG (errors) 163 #define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s"))) 164 #define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'"))) 165 #define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address"))) 166 #define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'"))) 167 #define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'"))) 168 #define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command"))) 169 #define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'"))) 170 #define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses"))) 171 #define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses"))) 172 #define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses"))) 173 #define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command"))) 174 #define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address"))) 175 #define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex"))) 176 #define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command"))) 177 #define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command"))) 178 #define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'"))) 179 #define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command"))) 180 #define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command"))) 181 #define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command"))) 182 #define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero"))) 183 #define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths"))) 184 #define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character"))) 185 #define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed"))) 186 #define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0"))) 187 #define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'"))) 188 189 static struct output *file_read = NULL; 190 static struct output *file_write = NULL; 191 192 193 /* Complain about an unknown command and exit. */ 195 void 196 bad_command(ch) 197 char ch; 198 { 199 const char *msg = _(UNKNOWN_CMD); 200 char *unknown_cmd = xmalloc(strlen(msg)); 201 sprintf(unknown_cmd, msg, ch); 202 bad_prog(unknown_cmd); 203 } 204 205 /* Complain about a programming error and exit. */ 206 void 207 bad_prog(why) 208 const char *why; 209 { 210 if (cur_input.name) 211 fprintf(stderr, _("%s: file %s line %lu: %s\n"), 212 myname, cur_input.name, CAST(unsigned long)cur_input.line, why); 213 else 214 fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"), 215 myname, 216 CAST(unsigned long)cur_input.string_expr_count, 217 CAST(unsigned long)(prog.cur-prog.base), 218 why); 219 exit(EXIT_FAILURE); 220 } 221 222 223 /* Read the next character from the program. Return EOF if there isn't 225 anything to read. Keep cur_input.line up to date, so error messages 226 can be meaningful. */ 227 static int inchar P_((void)); 228 static int 229 inchar() 230 { 231 int ch = EOF; 232 233 if (prog.cur) 234 { 235 if (prog.cur < prog.end) 236 ch = *prog.cur++; 237 } 238 else if (prog.file) 239 { 240 if (!feof(prog.file)) 241 ch = getc(prog.file); 242 } 243 if (ch == '\n') 244 ++cur_input.line; 245 return ch; 246 } 247 248 /* unget `ch' so the next call to inchar will return it. */ 249 static void savchar P_((int ch)); 250 static void 251 savchar(ch) 252 int ch; 253 { 254 if (ch == EOF) 255 return; 256 if (ch == '\n' && cur_input.line > 0) 257 --cur_input.line; 258 if (prog.cur) 259 { 260 if (prog.cur <= prog.base || *--prog.cur != ch) 261 panic("Called savchar() with unexpected pushback (%x)", 262 CAST(unsigned char)ch); 263 } 264 else 265 ungetc(ch, prog.file); 266 } 267 268 /* Read the next non-blank character from the program. */ 269 static int in_nonblank P_((void)); 270 static int 271 in_nonblank() 272 { 273 int ch; 274 do 275 ch = inchar(); 276 while (ISBLANK(ch)); 277 return ch; 278 } 279 280 /* Read an integer value from the program. */ 281 static countT in_integer P_((int ch)); 282 static countT 283 in_integer(ch) 284 int ch; 285 { 286 countT num = 0; 287 288 while (ISDIGIT(ch)) 289 { 290 num = num * 10 + ch - '0'; 291 ch = inchar(); 292 } 293 savchar(ch); 294 return num; 295 } 296 297 static int add_then_next P_((struct buffer *b, int ch)); 298 static int 299 add_then_next(b, ch) 300 struct buffer *b; 301 int ch; 302 { 303 add1_buffer(b, ch); 304 return inchar(); 305 } 306 307 static char * convert_number P_((char *, char *, const char *, int, int, int)); 308 static char * 309 convert_number(result, buf, bufend, base, maxdigits, default_char) 310 char *result; 311 char *buf; 312 const char *bufend; 313 int base; 314 int maxdigits; 315 int default_char; 316 { 317 int n = 0; 318 char *p; 319 320 for (p=buf; p < bufend && maxdigits-- > 0; ++p) 321 { 322 int d = -1; 323 switch (*p) 324 { 325 case '0': d = 0x0; break; 326 case '1': d = 0x1; break; 327 case '2': d = 0x2; break; 328 case '3': d = 0x3; break; 329 case '4': d = 0x4; break; 330 case '5': d = 0x5; break; 331 case '6': d = 0x6; break; 332 case '7': d = 0x7; break; 333 case '8': d = 0x8; break; 334 case '9': d = 0x9; break; 335 case 'A': case 'a': d = 0xa; break; 336 case 'B': case 'b': d = 0xb; break; 337 case 'C': case 'c': d = 0xc; break; 338 case 'D': case 'd': d = 0xd; break; 339 case 'E': case 'e': d = 0xe; break; 340 case 'F': case 'f': d = 0xf; break; 341 } 342 if (d < 0 || base <= d) 343 break; 344 n = n * base + d; 345 } 346 if (p == buf) 347 *result = default_char; 348 else 349 *result = n; 350 return p; 351 } 352 353 354 /* Read in a filename for a `r', `w', or `s///w' command. */ 356 static struct buffer *read_filename P_((void)); 357 static struct buffer * 358 read_filename() 359 { 360 struct buffer *b; 361 int ch; 362 363 b = init_buffer(); 364 ch = in_nonblank(); 365 while (ch != EOF && ch != '\n') 366 { 367 #if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/ 368 if (posixicity == POSIXLY_EXTENDED) 369 if (ch == ';' || ch == '#') 370 { 371 savchar(ch); 372 break; 373 } 374 #endif 375 ch = add_then_next(b, ch); 376 } 377 add1_buffer(b, '\0'); 378 return b; 379 } 380 381 static struct output *get_openfile P_((struct output **file_ptrs, const char *mode, int fail)); 382 static struct output * 383 get_openfile(file_ptrs, mode, fail) 384 struct output **file_ptrs; 385 const char *mode; 386 int fail; 387 { 388 struct buffer *b; 389 char *file_name; 390 struct output *p; 391 392 b = read_filename(); 393 file_name = get_buffer(b); 394 for (p=*file_ptrs; p; p=p->link) 395 if (strcmp(p->name, file_name) == 0) 396 break; 397 398 if (posixicity == POSIXLY_EXTENDED) 399 { 400 /* Check whether it is a special file (stdin, stdout or stderr) */ 401 struct special_files *special = special_files; 402 403 /* std* sometimes are not constants, so they 404 cannot be used in the initializer for special_files */ 405 my_stdin = stdin; my_stdout = stdout; my_stderr = stderr; 406 for (special = special_files; special->outf.name; special++) 407 if (strcmp(special->outf.name, file_name) == 0) 408 { 409 special->outf.fp = *special->pfp; 410 free_buffer (b); 411 return &special->outf; 412 } 413 } 414 415 if (!p) 416 { 417 p = OB_MALLOC(&obs, 1, struct output); 418 p->name = ck_strdup(file_name); 419 p->fp = ck_fopen(p->name, mode, fail); 420 p->missing_newline = false; 421 p->link = *file_ptrs; 422 *file_ptrs = p; 423 } 424 free_buffer(b); 425 return p; 426 } 427 428 429 static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp)); 431 static struct sed_cmd * 432 next_cmd_entry(vectorp) 433 struct vector **vectorp; 434 { 435 struct sed_cmd *cmd; 436 struct vector *v; 437 438 v = *vectorp; 439 if (v->v_length == v->v_allocated) 440 { 441 v->v_allocated += VECTOR_ALLOC_INCREMENT; 442 v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd); 443 } 444 445 cmd = v->v + v->v_length; 446 cmd->a1 = NULL; 447 cmd->a2 = NULL; 448 cmd->range_state = RANGE_INACTIVE; 449 cmd->addr_bang = false; 450 cmd->cmd = '\0'; /* something invalid, to catch bugs early */ 451 452 *vectorp = v; 453 return cmd; 454 } 455 456 static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat)); 457 static int 458 snarf_char_class(b, cur_stat) 459 struct buffer *b; 460 mbstate_t *cur_stat; 461 { 462 int ch; 463 int state = 0; 464 int delim; 465 bool pending_mb = 0; 466 467 ch = inchar(); 468 if (ch == '^') 469 ch = add_then_next(b, ch); 470 if (ch == CLOSE_BRACKET) 471 ch = add_then_next(b, ch); 472 473 /* States are: 474 0 outside a collation element, character class or collation class 475 1 after the bracket 476 2 after the opening ./:/= 477 3 after the closing ./:/= */ 478 479 for (;; ch = add_then_next (b, ch)) 480 { 481 pending_mb = BRLEN (ch, cur_stat) != 1; 482 483 switch (ch) 484 { 485 case EOF: 486 case '\n': 487 return ch; 488 489 case '.': 490 case ':': 491 case '=': 492 if (pending_mb) 493 continue; 494 495 if (state == 1) 496 { 497 delim = ch; 498 state = 2; 499 } 500 else if (state == 2 && ch == delim) 501 state = 3; 502 else 503 break; 504 505 continue; 506 507 case OPEN_BRACKET: 508 if (pending_mb) 509 continue; 510 511 if (state == 0) 512 state = 1; 513 continue; 514 515 case CLOSE_BRACKET: 516 if (pending_mb) 517 continue; 518 519 if (state == 0 || state == 1) 520 return ch; 521 else if (state == 3) 522 state = 0; 523 524 break; 525 526 default: 527 break; 528 } 529 530 /* Getting a character different from .=: whilst in state 1 531 goes back to state 0, getting a character different from ] 532 whilst in state 3 goes back to state 2. */ 533 state &= ~1; 534 } 535 } 536 537 static struct buffer *match_slash P_((int slash, int regex)); 538 static struct buffer * 539 match_slash(slash, regex) 540 int slash; 541 int regex; 542 { 543 struct buffer *b; 544 int ch; 545 mbstate_t cur_stat; 546 547 memset (&cur_stat, 0, sizeof (mbstate_t)); 548 549 /* We allow only 1 byte characters for a slash. */ 550 if (BRLEN (slash, &cur_stat) == -2) 551 bad_prog (BAD_DELIM); 552 553 memset (&cur_stat, 0, sizeof (mbstate_t)); 554 555 b = init_buffer(); 556 while ((ch = inchar()) != EOF && ch != '\n') 557 { 558 bool pending_mb = !MBSINIT (&cur_stat); 559 if (BRLEN (ch, &cur_stat) == 1 && !pending_mb) 560 { 561 if (ch == slash) 562 return b; 563 else if (ch == '\\') 564 { 565 ch = inchar(); 566 if (ch == EOF) 567 break; 568 #ifndef REG_PERL 569 else if (ch == 'n' && regex) 570 ch = '\n'; 571 #endif 572 else if (ch != '\n' && (ch != slash || (!regex && ch == '&'))) 573 add1_buffer(b, '\\'); 574 } 575 else if (ch == OPEN_BRACKET && regex) 576 { 577 add1_buffer(b, ch); 578 ch = snarf_char_class(b, &cur_stat); 579 if (ch != CLOSE_BRACKET) 580 break; 581 } 582 } 583 584 add1_buffer(b, ch); 585 } 586 587 if (ch == '\n') 588 savchar(ch); /* for proper line number in error report */ 589 free_buffer(b); 590 return NULL; 591 } 592 593 static int mark_subst_opts P_((struct subst *cmd)); 594 static int 595 mark_subst_opts(cmd) 596 struct subst *cmd; 597 { 598 int flags = 0; 599 int ch; 600 601 cmd->global = false; 602 cmd->print = false; 603 cmd->eval = false; 604 cmd->numb = 0; 605 cmd->outf = NULL; 606 607 for (;;) 608 switch ( (ch = in_nonblank()) ) 609 { 610 case 'i': /* GNU extension */ 611 case 'I': /* GNU extension */ 612 if (posixicity == POSIXLY_BASIC) 613 bad_prog(_(UNKNOWN_S_OPT)); 614 flags |= REG_ICASE; 615 break; 616 617 #ifdef REG_PERL 618 case 's': /* GNU extension */ 619 case 'S': /* GNU extension */ 620 if (posixicity == POSIXLY_BASIC) 621 bad_prog(_(UNKNOWN_S_OPT)); 622 if (extended_regexp_flags & REG_PERL) 623 flags |= REG_DOTALL; 624 break; 625 626 case 'x': /* GNU extension */ 627 case 'X': /* GNU extension */ 628 if (posixicity == POSIXLY_BASIC) 629 bad_prog(_(UNKNOWN_S_OPT)); 630 if (extended_regexp_flags & REG_PERL) 631 flags |= REG_EXTENDED; 632 break; 633 #endif 634 635 case 'm': /* GNU extension */ 636 case 'M': /* GNU extension */ 637 if (posixicity == POSIXLY_BASIC) 638 bad_prog(_(UNKNOWN_S_OPT)); 639 flags |= REG_NEWLINE; 640 break; 641 642 case 'e': 643 cmd->eval = true; 644 break; 645 646 case 'p': 647 if (cmd->print) 648 bad_prog(_(EXCESS_P_OPT)); 649 cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */ 650 break; 651 652 case 'g': 653 if (cmd->global) 654 bad_prog(_(EXCESS_G_OPT)); 655 cmd->global = true; 656 break; 657 658 case 'w': 659 cmd->outf = get_openfile(&file_write, "w", true); 660 return flags; 661 662 case '0': case '1': case '2': case '3': case '4': 663 case '5': case '6': case '7': case '8': case '9': 664 if (cmd->numb) 665 bad_prog(_(EXCESS_N_OPT)); 666 cmd->numb = in_integer(ch); 667 if (!cmd->numb) 668 bad_prog(_(ZERO_N_OPT)); 669 break; 670 671 case CLOSE_BRACE: 672 case '#': 673 savchar(ch); 674 /* Fall Through */ 675 case EOF: 676 case '\n': 677 case ';': 678 return flags; 679 680 case '\r': 681 if (inchar() == '\n') 682 return flags; 683 /* FALLTHROUGH */ 684 685 default: 686 bad_prog(_(UNKNOWN_S_OPT)); 687 /*NOTREACHED*/ 688 } 689 } 690 691 692 /* read in a label for a `:', `b', or `t' command */ 694 static char *read_label P_((void)); 695 static char * 696 read_label() 697 { 698 struct buffer *b; 699 int ch; 700 char *ret; 701 702 b = init_buffer(); 703 ch = in_nonblank(); 704 705 while (ch != EOF && ch != '\n' 706 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#') 707 ch = add_then_next (b, ch); 708 709 savchar(ch); 710 add1_buffer(b, '\0'); 711 ret = ck_strdup(get_buffer(b)); 712 free_buffer(b); 713 return ret; 714 } 715 716 /* Store a label (or label reference) created by a `:', `b', or `t' 717 command so that the jump to/from the label can be backpatched after 718 compilation is complete, or a reference created by a `{' to be 719 backpatched when the corresponding `}' is found. */ 720 static struct sed_label *setup_label 721 P_((struct sed_label *, countT, char *, const struct error_info *)); 722 static struct sed_label * 723 setup_label(list, idx, name, err_info) 724 struct sed_label *list; 725 countT idx; 726 char *name; 727 const struct error_info *err_info; 728 { 729 struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label); 730 ret->v_index = idx; 731 ret->name = name; 732 if (err_info) 733 MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info)); 734 ret->next = list; 735 return ret; 736 } 737 738 static struct sed_label *release_label P_((struct sed_label *list_head)); 739 static struct sed_label * 740 release_label(list_head) 741 struct sed_label *list_head; 742 { 743 struct sed_label *ret; 744 745 if (!list_head) 746 return NULL; 747 ret = list_head->next; 748 749 FREE(list_head->name); 750 751 #if 0 752 /* We use obstacks */ 753 FREE(list_head); 754 #endif 755 return ret; 756 } 757 758 static struct replacement *new_replacement P_((char *, size_t, 759 enum replacement_types)); 760 static struct replacement * 761 new_replacement(text, length, type) 762 char *text; 763 size_t length; 764 enum replacement_types type; 765 { 766 struct replacement *r = OB_MALLOC(&obs, 1, struct replacement); 767 768 r->prefix = text; 769 r->prefix_length = length; 770 r->subst_id = -1; 771 r->repl_type = type; 772 773 /* r-> next = NULL; */ 774 return r; 775 } 776 777 static void setup_replacement P_((struct subst *, const char *, size_t)); 778 static void 779 setup_replacement(sub, text, length) 780 struct subst *sub; 781 const char *text; 782 size_t length; 783 { 784 char *base; 785 char *p; 786 char *text_end; 787 enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS; 788 struct replacement root; 789 struct replacement *tail; 790 791 sub->max_id = 0; 792 base = MEMDUP(text, length, char); 793 length = normalize_text(base, length, TEXT_REPLACEMENT); 794 795 text_end = base + length; 796 tail = &root; 797 798 for (p=base; p<text_end; ++p) 799 { 800 if (*p == '\\') 801 { 802 /* Preceding the backslash may be some literal text: */ 803 tail = tail->next = 804 new_replacement(base, CAST(size_t)(p - base), repl_type); 805 806 repl_type = save_type; 807 808 /* Skip the backslash and look for a numeric back-reference, 809 or a case-munging escape if not in POSIX mode: */ 810 ++p; 811 if (p == text_end) 812 ++tail->prefix_length; 813 814 else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p)) 815 { 816 p[-1] = *p; 817 ++tail->prefix_length; 818 } 819 820 else 821 switch (*p) 822 { 823 case '0': case '1': case '2': case '3': case '4': 824 case '5': case '6': case '7': case '8': case '9': 825 tail->subst_id = *p - '0'; 826 if (sub->max_id < tail->subst_id) 827 sub->max_id = tail->subst_id; 828 break; 829 830 case 'L': 831 repl_type = REPL_LOWERCASE; 832 save_type = REPL_LOWERCASE; 833 break; 834 835 case 'U': 836 repl_type = REPL_UPPERCASE; 837 save_type = REPL_UPPERCASE; 838 break; 839 840 case 'E': 841 repl_type = REPL_ASIS; 842 save_type = REPL_ASIS; 843 break; 844 845 case 'l': 846 save_type = repl_type; 847 repl_type |= REPL_LOWERCASE_FIRST; 848 break; 849 850 case 'u': 851 save_type = repl_type; 852 repl_type |= REPL_UPPERCASE_FIRST; 853 break; 854 855 default: 856 p[-1] = *p; 857 ++tail->prefix_length; 858 } 859 860 base = p + 1; 861 } 862 else if (*p == '&') 863 { 864 /* Preceding the ampersand may be some literal text: */ 865 tail = tail->next = 866 new_replacement(base, CAST(size_t)(p - base), repl_type); 867 868 repl_type = save_type; 869 tail->subst_id = 0; 870 base = p + 1; 871 } 872 } 873 /* There may be some trailing literal text: */ 874 if (base < text_end) 875 tail = tail->next = 876 new_replacement(base, CAST(size_t)(text_end - base), repl_type); 877 878 tail->next = NULL; 879 sub->replacement = root.next; 880 } 881 882 static void read_text P_((struct text_buf *buf, int leadin_ch)); 883 static void 884 read_text(buf, leadin_ch) 885 struct text_buf *buf; 886 int leadin_ch; 887 { 888 int ch; 889 890 /* Should we start afresh (as opposed to continue a partial text)? */ 891 if (buf) 892 { 893 if (pending_text) 894 free_buffer(pending_text); 895 pending_text = init_buffer(); 896 buf->text = NULL; 897 buf->text_length = 0; 898 old_text_buf = buf; 899 } 900 /* assert(old_text_buf != NULL); */ 901 902 if (leadin_ch == EOF) 903 return; 904 905 if (leadin_ch != '\n') 906 add1_buffer(pending_text, leadin_ch); 907 908 ch = inchar(); 909 while (ch != EOF && ch != '\n') 910 { 911 if (ch == '\\') 912 { 913 ch = inchar(); 914 if (ch != EOF) 915 add1_buffer (pending_text, '\\'); 916 } 917 918 if (ch == EOF) 919 { 920 add1_buffer (pending_text, '\n'); 921 return; 922 } 923 924 ch = add_then_next (pending_text, ch); 925 } 926 927 add1_buffer(pending_text, '\n'); 928 if (!buf) 929 buf = old_text_buf; 930 buf->text_length = normalize_text (get_buffer (pending_text), 931 size_buffer (pending_text), TEXT_BUFFER); 932 buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char); 933 free_buffer(pending_text); 934 pending_text = NULL; 935 } 936 937 938 /* Try to read an address for a sed command. If it succeeds, 940 return non-zero and store the resulting address in `*addr'. 941 If the input doesn't look like an address read nothing 942 and return zero. */ 943 static bool compile_address P_((struct addr *addr, int ch)); 944 static bool 945 compile_address(addr, ch) 946 struct addr *addr; 947 int ch; 948 { 949 addr->addr_type = ADDR_IS_NULL; 950 addr->addr_step = 0; 951 addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */ 952 addr->addr_regex = NULL; 953 954 if (ch == '/' || ch == '\\') 955 { 956 int flags = 0; 957 struct buffer *b; 958 addr->addr_type = ADDR_IS_REGEX; 959 if (ch == '\\') 960 ch = inchar(); 961 if ( !(b = match_slash(ch, true)) ) 962 bad_prog(_(UNTERM_ADDR_RE)); 963 964 for(;;) 965 { 966 ch = in_nonblank(); 967 if (posixicity == POSIXLY_BASIC) 968 goto posix_address_modifier; 969 switch(ch) 970 { 971 case 'I': /* GNU extension */ 972 flags |= REG_ICASE; 973 break; 974 975 #ifdef REG_PERL 976 case 'S': /* GNU extension */ 977 if (extended_regexp_flags & REG_PERL) 978 flags |= REG_DOTALL; 979 break; 980 981 case 'X': /* GNU extension */ 982 if (extended_regexp_flags & REG_PERL) 983 flags |= REG_EXTENDED; 984 break; 985 #endif 986 987 case 'M': /* GNU extension */ 988 flags |= REG_NEWLINE; 989 break; 990 991 default: 992 posix_address_modifier: 993 savchar (ch); 994 addr->addr_regex = compile_regex (b, flags, 0); 995 free_buffer(b); 996 return true; 997 } 998 } 999 } 1000 else if (ISDIGIT(ch)) 1001 { 1002 addr->addr_number = in_integer(ch); 1003 addr->addr_type = ADDR_IS_NUM; 1004 ch = in_nonblank(); 1005 if (ch != '~' || posixicity == POSIXLY_BASIC) 1006 { 1007 savchar(ch); 1008 } 1009 else 1010 { 1011 countT step = in_integer(in_nonblank()); 1012 if (step > 0) 1013 { 1014 addr->addr_step = step; 1015 addr->addr_type = ADDR_IS_NUM_MOD; 1016 } 1017 } 1018 } 1019 else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC) 1020 { 1021 addr->addr_step = in_integer(in_nonblank()); 1022 if (addr->addr_step==0) 1023 ; /* default to ADDR_IS_NULL; forces matching to stop on next line */ 1024 else if (ch == '+') 1025 addr->addr_type = ADDR_IS_STEP; 1026 else 1027 addr->addr_type = ADDR_IS_STEP_MOD; 1028 } 1029 else if (ch == '$') 1030 { 1031 addr->addr_type = ADDR_IS_LAST; 1032 } 1033 else 1034 return false; 1035 1036 return true; 1037 } 1038 1039 /* Read a program (or a subprogram within `{' `}' pairs) in and store 1040 the compiled form in `*vector'. Return a pointer to the new vector. */ 1041 static struct vector *compile_program P_((struct vector *)); 1042 static struct vector * 1043 compile_program(vector) 1044 struct vector *vector; 1045 { 1046 struct sed_cmd *cur_cmd; 1047 struct buffer *b; 1048 int ch; 1049 1050 if (!vector) 1051 { 1052 vector = MALLOC(1, struct vector); 1053 vector->v = NULL; 1054 vector->v_allocated = 0; 1055 vector->v_length = 0; 1056 1057 obstack_init (&obs); 1058 } 1059 if (pending_text) 1060 read_text(NULL, '\n'); 1061 1062 for (;;) 1063 { 1064 struct addr a; 1065 1066 while ((ch=inchar()) == ';' || ISSPACE(ch)) 1067 ; 1068 if (ch == EOF) 1069 break; 1070 1071 cur_cmd = next_cmd_entry(&vector); 1072 if (compile_address(&a, ch)) 1073 { 1074 if (a.addr_type == ADDR_IS_STEP 1075 || a.addr_type == ADDR_IS_STEP_MOD) 1076 bad_prog(_(BAD_STEP)); 1077 1078 cur_cmd->a1 = MEMDUP(&a, 1, struct addr); 1079 ch = in_nonblank(); 1080 if (ch == ',') 1081 { 1082 if (!compile_address(&a, in_nonblank())) 1083 bad_prog(_(BAD_COMMA)); 1084 1085 cur_cmd->a2 = MEMDUP(&a, 1, struct addr); 1086 ch = in_nonblank(); 1087 } 1088 1089 if ((cur_cmd->a1->addr_type == ADDR_IS_NUM 1090 && cur_cmd->a1->addr_number == 0) 1091 && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX) 1092 || posixicity == POSIXLY_BASIC)) 1093 bad_prog(_(INVALID_LINE_0)); 1094 } 1095 if (ch == '!') 1096 { 1097 cur_cmd->addr_bang = true; 1098 ch = in_nonblank(); 1099 if (ch == '!') 1100 bad_prog(_(BAD_BANG)); 1101 } 1102 1103 /* Do not accept extended commands in --posix mode. Also, 1104 a few commands only accept one address in that mode. */ 1105 if (posixicity == POSIXLY_BASIC) 1106 switch (ch) 1107 { 1108 case 'e': case 'v': case 'z': case 'L': 1109 case 'Q': case 'T': case 'R': case 'W': 1110 bad_command(ch); 1111 1112 case 'a': case 'i': case 'l': 1113 case '=': case 'r': 1114 if (cur_cmd->a2) 1115 bad_prog(_(ONE_ADDR)); 1116 } 1117 1118 cur_cmd->cmd = ch; 1119 switch (ch) 1120 { 1121 case '#': 1122 if (cur_cmd->a1) 1123 bad_prog(_(NO_SHARP_ADDR)); 1124 ch = inchar(); 1125 if (ch=='n' && first_script && cur_input.line < 2) 1126 if ( (prog.base && prog.cur==2+prog.base) 1127 || (prog.file && !prog.base && 2==ftell(prog.file))) 1128 no_default_output = true; 1129 while (ch != EOF && ch != '\n') 1130 ch = inchar(); 1131 continue; /* restart the for (;;) loop */ 1132 1133 case 'v': 1134 /* This is an extension. Programs needing GNU sed might start 1135 * with a `v' command so that other seds will stop. 1136 * We compare the version and ignore POSIXLY_CORRECT. 1137 */ 1138 { 1139 char *version = read_label (); 1140 char *compared_version; 1141 compared_version = (*version == '\0') ? "4.0" : version; 1142 if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0) 1143 bad_prog(_(ANCIENT_VERSION)); 1144 1145 free (version); 1146 posixicity = POSIXLY_EXTENDED; 1147 } 1148 continue; 1149 1150 case '{': 1151 blocks = setup_label(blocks, vector->v_length, NULL, &cur_input); 1152 cur_cmd->addr_bang = !cur_cmd->addr_bang; 1153 break; 1154 1155 case '}': 1156 if (!blocks) 1157 bad_prog(_(EXCESS_CLOSE_BRACE)); 1158 if (cur_cmd->a1) 1159 bad_prog(_(NO_CLOSE_BRACE_ADDR)); 1160 ch = in_nonblank(); 1161 if (ch == CLOSE_BRACE || ch == '#') 1162 savchar(ch); 1163 else if (ch != EOF && ch != '\n' && ch != ';') 1164 bad_prog(_(EXCESS_JUNK)); 1165 1166 vector->v[blocks->v_index].x.jump_index = vector->v_length; 1167 blocks = release_label(blocks); /* done with this entry */ 1168 break; 1169 1170 case 'e': 1171 ch = in_nonblank(); 1172 if (ch == EOF || ch == '\n') 1173 { 1174 cur_cmd->x.cmd_txt.text_length = 0; 1175 break; 1176 } 1177 else 1178 goto read_text_to_slash; 1179 1180 case 'a': 1181 case 'i': 1182 case 'c': 1183 ch = in_nonblank(); 1184 1185 read_text_to_slash: 1186 if (ch == EOF) 1187 bad_prog(_(EXPECTED_SLASH)); 1188 1189 if (ch == '\\') 1190 ch = inchar(); 1191 else 1192 { 1193 if (posixicity == POSIXLY_BASIC) 1194 bad_prog(_(EXPECTED_SLASH)); 1195 savchar(ch); 1196 ch = '\n'; 1197 } 1198 1199 read_text(&cur_cmd->x.cmd_txt, ch); 1200 break; 1201 1202 case ':': 1203 if (cur_cmd->a1) 1204 bad_prog(_(NO_COLON_ADDR)); 1205 labels = setup_label(labels, vector->v_length, read_label(), NULL); 1206 break; 1207 1208 case 'T': 1209 case 'b': 1210 case 't': 1211 jumps = setup_label(jumps, vector->v_length, read_label(), NULL); 1212 break; 1213 1214 case 'Q': 1215 case 'q': 1216 if (cur_cmd->a2) 1217 bad_prog(_(ONE_ADDR)); 1218 /* Fall through */ 1219 1220 case 'L': 1221 case 'l': 1222 ch = in_nonblank(); 1223 if (ISDIGIT(ch) && posixicity != POSIXLY_BASIC) 1224 { 1225 cur_cmd->x.int_arg = in_integer(ch); 1226 ch = in_nonblank(); 1227 } 1228 else 1229 cur_cmd->x.int_arg = -1; 1230 1231 if (ch == CLOSE_BRACE || ch == '#') 1232 savchar(ch); 1233 else if (ch != EOF && ch != '\n' && ch != ';') 1234 bad_prog(_(EXCESS_JUNK)); 1235 1236 break; 1237 1238 case '=': 1239 case 'd': 1240 case 'D': 1241 case 'g': 1242 case 'G': 1243 case 'h': 1244 case 'H': 1245 case 'n': 1246 case 'N': 1247 case 'p': 1248 case 'P': 1249 case 'z': 1250 case 'x': 1251 ch = in_nonblank(); 1252 if (ch == CLOSE_BRACE || ch == '#') 1253 savchar(ch); 1254 else if (ch != EOF && ch != '\n' && ch != ';') 1255 bad_prog(_(EXCESS_JUNK)); 1256 break; 1257 1258 case 'r': 1259 b = read_filename(); 1260 cur_cmd->x.fname = ck_strdup(get_buffer(b)); 1261 free_buffer(b); 1262 break; 1263 1264 case 'R': 1265 cur_cmd->x.fp = get_openfile(&file_read, read_mode, false)->fp; 1266 break; 1267 1268 case 'W': 1269 case 'w': 1270 cur_cmd->x.outf = get_openfile(&file_write, "w", true); 1271 break; 1272 1273 case 's': 1274 { 1275 struct buffer *b2; 1276 int flags; 1277 int slash; 1278 1279 slash = inchar(); 1280 if ( !(b = match_slash(slash, true)) ) 1281 bad_prog(_(UNTERM_S_CMD)); 1282 if ( !(b2 = match_slash(slash, false)) ) 1283 bad_prog(_(UNTERM_S_CMD)); 1284 1285 cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst); 1286 setup_replacement(cur_cmd->x.cmd_subst, 1287 get_buffer(b2), size_buffer(b2)); 1288 free_buffer(b2); 1289 1290 flags = mark_subst_opts(cur_cmd->x.cmd_subst); 1291 cur_cmd->x.cmd_subst->regx = 1292 compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1); 1293 free_buffer(b); 1294 } 1295 break; 1296 1297 case 'y': 1298 { 1299 size_t len, dest_len; 1300 int slash; 1301 struct buffer *b2; 1302 char *src_buf, *dest_buf; 1303 1304 slash = inchar(); 1305 if ( !(b = match_slash(slash, false)) ) 1306 bad_prog(_(UNTERM_Y_CMD)); 1307 src_buf = get_buffer(b); 1308 len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER); 1309 1310 if ( !(b2 = match_slash(slash, false)) ) 1311 bad_prog(_(UNTERM_Y_CMD)); 1312 dest_buf = get_buffer(b2); 1313 dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER); 1314 1315 if (mb_cur_max > 1) 1316 { 1317 int i, j, idx, src_char_num; 1318 size_t *src_lens = MALLOC(len, size_t); 1319 char **trans_pairs; 1320 size_t mbclen; 1321 mbstate_t cur_stat; 1322 1323 /* Enumerate how many character the source buffer has. */ 1324 memset(&cur_stat, 0, sizeof(mbstate_t)); 1325 for (i = 0, j = 0; i < len;) 1326 { 1327 mbclen = MBRLEN (src_buf + i, len - i, &cur_stat); 1328 /* An invalid sequence, or a truncated multibyte character. 1329 We treat it as a singlebyte character. */ 1330 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 1331 || mbclen == 0) 1332 mbclen = 1; 1333 src_lens[j++] = mbclen; 1334 i += mbclen; 1335 } 1336 src_char_num = j; 1337 1338 memset(&cur_stat, 0, sizeof(mbstate_t)); 1339 idx = 0; 1340 1341 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL} 1342 src(i) : pointer to i-th source character. 1343 dest(i) : pointer to i-th destination character. 1344 NULL : terminator */ 1345 trans_pairs = MALLOC(2 * src_char_num + 1, char*); 1346 cur_cmd->x.translatemb = trans_pairs; 1347 for (i = 0; i < src_char_num; i++) 1348 { 1349 if (idx >= dest_len) 1350 bad_prog(_(Y_CMD_LEN)); 1351 1352 /* Set the i-th source character. */ 1353 trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char); 1354 strncpy(trans_pairs[2 * i], src_buf, src_lens[i]); 1355 trans_pairs[2 * i][src_lens[i]] = '\0'; 1356 src_buf += src_lens[i]; /* Forward to next character. */ 1357 1358 /* Fetch the i-th destination character. */ 1359 mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat); 1360 /* An invalid sequence, or a truncated multibyte character. 1361 We treat it as a singlebyte character. */ 1362 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 1363 || mbclen == 0) 1364 mbclen = 1; 1365 1366 /* Set the i-th destination character. */ 1367 trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char); 1368 strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen); 1369 trans_pairs[2 * i + 1][mbclen] = '\0'; 1370 idx += mbclen; /* Forward to next character. */ 1371 } 1372 trans_pairs[2 * i] = NULL; 1373 if (idx != dest_len) 1374 bad_prog(_(Y_CMD_LEN)); 1375 } 1376 else 1377 { 1378 unsigned char *translate = 1379 OB_MALLOC(&obs, YMAP_LENGTH, unsigned char); 1380 unsigned char *ustring = CAST(unsigned char *)src_buf; 1381 1382 if (len != dest_len) 1383 bad_prog(_(Y_CMD_LEN)); 1384 1385 for (len = 0; len < YMAP_LENGTH; len++) 1386 translate[len] = len; 1387 1388 while (dest_len--) 1389 translate[*ustring++] = (unsigned char)*dest_buf++; 1390 1391 cur_cmd->x.translate = translate; 1392 } 1393 1394 if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';') 1395 bad_prog(_(EXCESS_JUNK)); 1396 1397 free_buffer(b); 1398 free_buffer(b2); 1399 } 1400 break; 1401 1402 case EOF: 1403 bad_prog(_(NO_COMMAND)); 1404 /*NOTREACHED*/ 1405 1406 default: 1407 bad_command (ch); 1408 /*NOTREACHED*/ 1409 } 1410 1411 /* this is buried down here so that "continue" statements will miss it */ 1412 ++vector->v_length; 1413 } 1414 return vector; 1415 } 1416 1417 1418 /* deal with \X escapes */ 1420 size_t 1421 normalize_text(buf, len, buftype) 1422 char *buf; 1423 size_t len; 1424 enum text_types buftype; 1425 { 1426 const char *bufend = buf + len; 1427 char *p = buf; 1428 char *q = buf; 1429 1430 /* This variable prevents normalizing text within bracket 1431 subexpressions when conforming to POSIX. If 0, we 1432 are not within a bracket expression. If -1, we are within a 1433 bracket expression but are not within [.FOO.], [=FOO=], 1434 or [:FOO:]. Otherwise, this is the '.', '=', or ':' 1435 respectively within these three types of subexpressions. */ 1436 int bracket_state = 0; 1437 1438 int mbclen; 1439 mbstate_t cur_stat; 1440 memset(&cur_stat, 0, sizeof(mbstate_t)); 1441 1442 while (p < bufend) 1443 { 1444 int c; 1445 mbclen = MBRLEN (p, bufend - p, &cur_stat); 1446 if (mbclen != 1) 1447 { 1448 /* An invalid sequence, or a truncated multibyte character. 1449 We treat it as a singlebyte character. */ 1450 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) 1451 mbclen = 1; 1452 1453 memmove (q, p, mbclen); 1454 q += mbclen; 1455 p += mbclen; 1456 continue; 1457 } 1458 1459 if (*p == '\\' && p+1 < bufend && bracket_state == 0) 1460 switch ( (c = *++p) ) 1461 { 1462 #if defined __STDC__ && __STDC__-0 1463 case 'a': *q++ = '\a'; p++; continue; 1464 #else /* Not STDC; we'll just assume ASCII */ 1465 case 'a': *q++ = '\007'; p++; continue; 1466 #endif 1467 /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */ 1468 case 'f': *q++ = '\f'; p++; continue; 1469 case '\n': /*fall through */ 1470 case 'n': *q++ = '\n'; p++; continue; 1471 case 'r': *q++ = '\r'; p++; continue; 1472 case 't': *q++ = '\t'; p++; continue; 1473 case 'v': *q++ = '\v'; p++; continue; 1474 1475 case 'd': /* decimal byte */ 1476 p = convert_number(q, p+1, bufend, 10, 3, 'd'); 1477 q++; 1478 continue; 1479 1480 case 'x': /* hexadecimal byte */ 1481 p = convert_number(q, p+1, bufend, 16, 2, 'x'); 1482 q++; 1483 continue; 1484 1485 #ifdef REG_PERL 1486 case '0': case '1': case '2': case '3': 1487 case '4': case '5': case '6': case '7': 1488 if ((extended_regexp_flags & REG_PERL) 1489 && p+1 < bufend 1490 && p[1] >= '0' && p[1] <= '9') 1491 { 1492 p = convert_number(q, p, bufend, 8, 3, *p); 1493 q++; 1494 } 1495 else 1496 { 1497 /* we just pass the \ up one level for interpretation */ 1498 if (buftype != TEXT_BUFFER) 1499 *q++ = '\\'; 1500 } 1501 1502 continue; 1503 1504 case 'o': /* octal byte */ 1505 if (!(extended_regexp_flags & REG_PERL)) 1506 { 1507 p = convert_number(q, p+1, bufend, 8, 3, 'o'); 1508 q++; 1509 } 1510 else 1511 { 1512 /* we just pass the \ up one level for interpretation */ 1513 if (buftype != TEXT_BUFFER) 1514 *q++ = '\\'; 1515 } 1516 1517 continue; 1518 #else 1519 case 'o': /* octal byte */ 1520 p = convert_number(q, p+1, bufend, 8, 3, 'o'); 1521 q++; 1522 continue; 1523 #endif 1524 1525 case 'c': 1526 if (++p < bufend) 1527 { 1528 *q++ = toupper(*p) ^ 0x40; 1529 p++; 1530 continue; 1531 } 1532 else 1533 { 1534 /* we just pass the \ up one level for interpretation */ 1535 if (buftype != TEXT_BUFFER) 1536 *q++ = '\\'; 1537 continue; 1538 } 1539 1540 default: 1541 /* we just pass the \ up one level for interpretation */ 1542 if (buftype != TEXT_BUFFER) 1543 *q++ = '\\'; 1544 break; 1545 } 1546 else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED) 1547 switch (*p) 1548 { 1549 case '[': 1550 if (!bracket_state) 1551 bracket_state = -1; 1552 break; 1553 1554 case ':': 1555 case '.': 1556 case '=': 1557 if (bracket_state == -1 && p[-1] == '[') 1558 bracket_state = *p; 1559 break; 1560 1561 case ']': 1562 if (bracket_state == 0) 1563 ; 1564 else if (bracket_state == -1) 1565 bracket_state = 0; 1566 else if (p[-2] != bracket_state && p[-1] == bracket_state) 1567 bracket_state = -1; 1568 break; 1569 } 1570 1571 *q++ = *p++; 1572 } 1573 return (size_t)(q - buf); 1574 } 1575 1576 1577 /* `str' is a string (from the command line) that contains a sed command. 1578 Compile the command, and add it to the end of `cur_program'. */ 1579 struct vector * 1580 compile_string(cur_program, str, len) 1581 struct vector *cur_program; 1582 char *str; 1583 size_t len; 1584 { 1585 static countT string_expr_count = 0; 1586 struct vector *ret; 1587 1588 prog.file = NULL; 1589 prog.base = CAST(unsigned char *)str; 1590 prog.cur = prog.base; 1591 prog.end = prog.cur + len; 1592 1593 cur_input.line = 0; 1594 cur_input.name = NULL; 1595 cur_input.string_expr_count = ++string_expr_count; 1596 1597 ret = compile_program(cur_program); 1598 prog.base = NULL; 1599 prog.cur = NULL; 1600 prog.end = NULL; 1601 1602 first_script = false; 1603 return ret; 1604 } 1605 1606 /* `cmdfile' is the name of a file containing sed commands. 1607 Read them in and add them to the end of `cur_program'. 1608 */ 1609 struct vector * 1610 compile_file(cur_program, cmdfile) 1611 struct vector *cur_program; 1612 const char *cmdfile; 1613 { 1614 struct vector *ret; 1615 1616 prog.file = stdin; 1617 if (cmdfile[0] != '-' || cmdfile[1] != '\0') 1618 prog.file = ck_fopen(cmdfile, "rt", true); 1619 1620 cur_input.line = 1; 1621 cur_input.name = cmdfile; 1622 cur_input.string_expr_count = 0; 1623 1624 ret = compile_program(cur_program); 1625 if (prog.file != stdin) 1626 ck_fclose(prog.file); 1627 prog.file = NULL; 1628 1629 first_script = false; 1630 return ret; 1631 } 1632 1633 /* Make any checks which require the whole program to have been read. 1634 In particular: this backpatches the jump targets. 1635 Any cleanup which can be done after these checks is done here also. */ 1636 void 1637 check_final_program(program) 1638 struct vector *program; 1639 { 1640 struct sed_label *go; 1641 struct sed_label *lbl; 1642 1643 /* do all "{"s have a corresponding "}"? */ 1644 if (blocks) 1645 { 1646 /* update info for error reporting: */ 1647 MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input)); 1648 bad_prog(_(EXCESS_OPEN_BRACE)); 1649 } 1650 1651 /* was the final command an unterminated a/c/i command? */ 1652 if (pending_text) 1653 { 1654 old_text_buf->text_length = size_buffer(pending_text); 1655 if (old_text_buf->text_length) 1656 old_text_buf->text = MEMDUP(get_buffer(pending_text), 1657 old_text_buf->text_length, char); 1658 free_buffer(pending_text); 1659 pending_text = NULL; 1660 } 1661 1662 for (go = jumps; go; go = release_label(go)) 1663 { 1664 for (lbl = labels; lbl; lbl = lbl->next) 1665 if (strcmp(lbl->name, go->name) == 0) 1666 break; 1667 if (lbl) 1668 { 1669 program->v[go->v_index].x.jump_index = lbl->v_index; 1670 } 1671 else 1672 { 1673 if (*go->name) 1674 panic(_("can't find label for jump to `%s'"), go->name); 1675 program->v[go->v_index].x.jump_index = program->v_length; 1676 } 1677 } 1678 jumps = NULL; 1679 1680 for (lbl = labels; lbl; lbl = release_label(lbl)) 1681 ; 1682 labels = NULL; 1683 1684 /* There is no longer a need to track file names: */ 1685 { 1686 struct output *p; 1687 1688 for (p=file_read; p; p=p->link) 1689 if (p->name) 1690 { 1691 FREE(p->name); 1692 p->name = NULL; 1693 } 1694 1695 for (p=file_write; p; p=p->link) 1696 if (p->name) 1697 { 1698 FREE(p->name); 1699 p->name = NULL; 1700 } 1701 } 1702 } 1703 1704 /* Rewind all resources which were allocated in this module. */ 1705 void 1706 rewind_read_files() 1707 { 1708 struct output *p; 1709 1710 for (p=file_read; p; p=p->link) 1711 if (p->fp) 1712 rewind(p->fp); 1713 } 1714 1715 /* Release all resources which were allocated in this module. */ 1716 void 1717 finish_program(program) 1718 struct vector *program; 1719 { 1720 /* close all files... */ 1721 { 1722 struct output *p, *q; 1723 1724 for (p=file_read; p; p=q) 1725 { 1726 if (p->fp) 1727 ck_fclose(p->fp); 1728 q = p->link; 1729 #if 0 1730 /* We use obstacks. */ 1731 FREE(p); 1732 #endif 1733 } 1734 1735 for (p=file_write; p; p=q) 1736 { 1737 if (p->fp) 1738 ck_fclose(p->fp); 1739 q = p->link; 1740 #if 0 1741 /* We use obstacks. */ 1742 FREE(p); 1743 #endif 1744 } 1745 file_read = file_write = NULL; 1746 } 1747 1748 #ifdef DEBUG_LEAKS 1749 obstack_free (&obs, NULL); 1750 #endif /*DEBUG_LEAKS*/ 1751 } 1752