1 /* GNU SED, a batch stream editor. 2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008,2009 3 Free Software Foundation, Inc. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 #undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/ 20 #define INITIAL_BUFFER_SIZE 50 21 #define FREAD_BUFFER_SIZE 8192 22 23 #include "sed.h" 24 25 #include <stddef.h> 26 #include <stdio.h> 27 #include <ctype.h> 28 29 #include <errno.h> 30 #ifndef errno 31 extern int errno; 32 #endif 33 34 #ifndef BOOTSTRAP 35 #include <selinux/selinux.h> 36 #include <selinux/context.h> 37 #endif 38 39 #ifdef HAVE_UNISTD_H 40 # include <unistd.h> 41 #endif 42 43 #ifndef BOOTSTRAP 44 #include "acl.h" 45 #endif 46 47 #ifdef __GNUC__ 48 # if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7) 49 /* silence warning about unused parameter even for "gcc -W -Wunused" */ 50 # define UNUSED __attribute__((unused)) 51 # endif 52 #endif 53 #ifndef UNUSED 54 # define UNUSED 55 #endif 56 57 #ifdef HAVE_STRINGS_H 58 # include <strings.h> 59 #else 60 # include <string.h> 61 #endif /*HAVE_STRINGS_H*/ 62 #ifdef HAVE_MEMORY_H 63 # include <memory.h> 64 #endif 65 66 #ifndef HAVE_STRCHR 67 # define strchr index 68 # define strrchr rindex 69 #endif 70 71 #ifdef HAVE_STDLIB_H 72 # include <stdlib.h> 73 #endif 74 #ifndef EXIT_SUCCESS 75 # define EXIT_SUCCESS 0 76 #endif 77 78 #ifdef HAVE_SYS_TYPES_H 79 # include <sys/types.h> 80 #endif 81 82 #include <sys/stat.h> 83 #include "stat-macros.h" 84 85 86 /* Sed operates a line at a time. */ 88 struct line { 89 char *text; /* Pointer to line allocated by malloc. */ 90 char *active; /* Pointer to non-consumed part of text. */ 91 size_t length; /* Length of text (or active, if used). */ 92 size_t alloc; /* Allocated space for active. */ 93 bool chomped; /* Was a trailing newline dropped? */ 94 #ifdef HAVE_MBRTOWC 95 mbstate_t mbstate; 96 #endif 97 }; 98 99 #ifdef HAVE_MBRTOWC 100 #define SIZEOF_LINE offsetof (struct line, mbstate) 101 #else 102 #define SIZEOF_LINE (sizeof (struct line)) 103 #endif 104 105 /* A queue of text to write out at the end of a cycle 106 (filled by the "a", "r" and "R" commands.) */ 107 struct append_queue { 108 const char *fname; 109 char *text; 110 size_t textlen; 111 struct append_queue *next; 112 bool free; 113 }; 114 115 /* State information for the input stream. */ 116 struct input { 117 /* The list of yet-to-be-opened files. It is invalid for file_list 118 to be NULL. When *file_list is NULL we are currently processing 119 the last file. */ 120 121 char **file_list; 122 123 /* Count of files we failed to open. */ 124 countT bad_count; 125 126 /* Current input line number (over all files). */ 127 countT line_number; 128 129 /* True if we'll reset line numbers and addresses before 130 starting to process the next (possibly the first) file. */ 131 bool reset_at_next_file; 132 133 /* Function to read one line. If FP is NULL, read_fn better not 134 be one which uses fp; in particular, read_always_fail() is 135 recommended. */ 136 bool (*read_fn) P_((struct input *)); /* read one line */ 137 138 char *out_file_name; 139 140 const char *in_file_name; 141 142 /* Owner and mode to be set just before closing the file. */ 143 struct stat st; 144 145 /* if NULL, none of the following are valid */ 146 FILE *fp; 147 148 bool no_buffering; 149 }; 150 151 152 /* Have we done any replacements lately? This is used by the `t' command. */ 153 static bool replaced = false; 154 155 /* The current output file (stdout if -i is not being used. */ 156 static struct output output_file; 157 158 /* The `current' input line. */ 159 static struct line line; 160 161 /* An input line used to accumulate the result of the s and e commands. */ 162 static struct line s_accum; 163 164 /* An input line that's been stored by later use by the program */ 165 static struct line hold; 166 167 /* The buffered input look-ahead. The only field that should be 168 used outside of read_mem_line() or line_init() is buffer.length. */ 169 static struct line buffer; 170 171 static struct append_queue *append_head = NULL; 172 static struct append_queue *append_tail = NULL; 173 174 175 #ifdef BOOTSTRAP 177 /* We can't be sure that the system we're boostrapping on has 178 memchr(), and ../lib/memchr.c requires configuration knowledge 179 about how many bits are in a `long'. This implementation 180 is far from ideal, but it should get us up-and-limping well 181 enough to run the configure script, which is all that matters. 182 */ 183 # ifdef memchr 184 # undef memchr 185 # endif 186 # define memchr bootstrap_memchr 187 188 static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n)); 189 static VOID * 190 bootstrap_memchr(s, c, n) 191 const VOID *s; 192 int c; 193 size_t n; 194 { 195 char *p; 196 197 for (p=(char *)s; n-- > 0; ++p) 198 if (*p == c) 199 return p; 200 return CAST(VOID *)0; 201 } 202 #endif /*BOOTSTRAP*/ 203 204 /* increase a struct line's length, making some attempt at 205 keeping realloc() calls under control by padding for future growth. */ 206 static void resize_line P_((struct line *, size_t)); 207 static void 208 resize_line(lb, len) 209 struct line *lb; 210 size_t len; 211 { 212 int inactive; 213 inactive = lb->active - lb->text; 214 215 /* If the inactive part has got to more than two thirds of the buffer, 216 * remove it. */ 217 if (inactive > lb->alloc * 2) 218 { 219 MEMMOVE(lb->text, lb->active, lb->length); 220 lb->alloc += lb->active - lb->text; 221 lb->active = lb->text; 222 inactive = 0; 223 224 if (lb->alloc > len) 225 return; 226 } 227 228 lb->alloc *= 2; 229 if (lb->alloc < len) 230 lb->alloc = len; 231 if (lb->alloc < INITIAL_BUFFER_SIZE) 232 lb->alloc = INITIAL_BUFFER_SIZE; 233 234 lb->text = REALLOC(lb->text, inactive + lb->alloc, char); 235 lb->active = lb->text + inactive; 236 } 237 238 /* Append `length' bytes from `string' to the line `to'. */ 239 static void str_append P_((struct line *, const char *, size_t)); 240 static void 241 str_append(to, string, length) 242 struct line *to; 243 const char *string; 244 size_t length; 245 { 246 size_t new_length = to->length + length; 247 248 if (to->alloc < new_length) 249 resize_line(to, new_length); 250 MEMCPY(to->active + to->length, string, length); 251 to->length = new_length; 252 253 #ifdef HAVE_MBRTOWC 254 if (mb_cur_max > 1 && !is_utf8) 255 while (length) 256 { 257 size_t n = MBRLEN (string, length, &to->mbstate); 258 259 /* An invalid sequence is treated like a singlebyte character. */ 260 if (n == (size_t) -1) 261 { 262 memset (&to->mbstate, 0, sizeof (to->mbstate)); 263 n = 1; 264 } 265 266 if (n > 0) 267 { 268 string += n; 269 length -= n; 270 } 271 else 272 break; 273 } 274 #endif 275 } 276 277 static void str_append_modified P_((struct line *, const char *, size_t, 278 enum replacement_types)); 279 static void 280 str_append_modified(to, string, length, type) 281 struct line *to; 282 const char *string; 283 size_t length; 284 enum replacement_types type; 285 { 286 #ifdef HAVE_MBRTOWC 287 mbstate_t from_stat; 288 289 if (type == REPL_ASIS) 290 { 291 str_append(to, string, length); 292 return; 293 } 294 295 if (to->alloc - to->length < length * mb_cur_max) 296 resize_line(to, to->length + length * mb_cur_max); 297 298 MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t)); 299 while (length) 300 { 301 wchar_t wc; 302 int n = MBRTOWC (&wc, string, length, &from_stat); 303 304 /* An invalid sequence is treated like a singlebyte character. */ 305 if (n == -1) 306 { 307 memset (&to->mbstate, 0, sizeof (from_stat)); 308 n = 1; 309 } 310 311 if (n > 0) 312 string += n, length -= n; 313 else 314 { 315 /* Incomplete sequence, copy it manually. */ 316 str_append(to, string, length); 317 return; 318 } 319 320 /* Convert the first character specially... */ 321 if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST)) 322 { 323 if (type & REPL_UPPERCASE_FIRST) 324 wc = towupper(wc); 325 else 326 wc = towlower(wc); 327 328 type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST); 329 if (type == REPL_ASIS) 330 { 331 n = WCRTOMB (to->active + to->length, wc, &to->mbstate); 332 to->length += n; 333 str_append(to, string, length); 334 return; 335 } 336 } 337 338 else if (type & REPL_UPPERCASE) 339 wc = towupper(wc); 340 else 341 wc = towlower(wc); 342 343 /* Copy the new wide character to the end of the string. */ 344 n = WCRTOMB (to->active + to->length, wc, &to->mbstate); 345 to->length += n; 346 if (n == -1) 347 { 348 fprintf (stderr, "Case conversion produced an invalid character!"); 349 abort (); 350 } 351 } 352 #else 353 size_t old_length = to->length; 354 char *start, *end; 355 356 str_append(to, string, length); 357 start = to->active + old_length; 358 end = start + length; 359 360 /* Now do the required modifications. First \[lu]... */ 361 if (type & REPL_UPPERCASE_FIRST) 362 { 363 *start = toupper(*start); 364 start++; 365 type &= ~REPL_UPPERCASE_FIRST; 366 } 367 else if (type & REPL_LOWERCASE_FIRST) 368 { 369 *start = tolower(*start); 370 start++; 371 type &= ~REPL_LOWERCASE_FIRST; 372 } 373 374 if (type == REPL_ASIS) 375 return; 376 377 /* ...and then \[LU] */ 378 if (type == REPL_UPPERCASE) 379 for (; start != end; start++) 380 *start = toupper(*start); 381 else 382 for (; start != end; start++) 383 *start = tolower(*start); 384 #endif 385 } 386 387 /* Initialize a "struct line" buffer. Copy multibyte state from `state' 388 if not null. */ 389 static void line_init P_((struct line *, struct line *, size_t initial_size)); 390 static void 391 line_init(buf, state, initial_size) 392 struct line *buf; 393 struct line *state; 394 size_t initial_size; 395 { 396 buf->text = MALLOC(initial_size, char); 397 buf->active = buf->text; 398 buf->alloc = initial_size; 399 buf->length = 0; 400 buf->chomped = true; 401 402 #ifdef HAVE_MBRTOWC 403 if (state) 404 memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); 405 else 406 memset (&buf->mbstate, 0, sizeof (buf->mbstate)); 407 #endif 408 } 409 410 /* Reset a "struct line" buffer to length zero. Copy multibyte state from 411 `state' if not null. */ 412 static void line_reset P_((struct line *, struct line *)); 413 static void 414 line_reset(buf, state) 415 struct line *buf, *state; 416 { 417 if (buf->alloc == 0) 418 line_init(buf, state, INITIAL_BUFFER_SIZE); 419 else 420 { 421 buf->length = 0; 422 #ifdef HAVE_MBRTOWC 423 if (state) 424 memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); 425 else 426 memset (&buf->mbstate, 0, sizeof (buf->mbstate)); 427 #endif 428 } 429 } 430 431 /* Copy the contents of the line `from' into the line `to'. 432 This destroys the old contents of `to'. 433 Copy the multibyte state if `state' is true. */ 434 static void line_copy P_((struct line *from, struct line *to, int state)); 435 static void 436 line_copy(from, to, state) 437 struct line *from; 438 struct line *to; 439 int state; 440 { 441 /* Remove the inactive portion in the destination buffer. */ 442 to->alloc += to->active - to->text; 443 444 if (to->alloc < from->length) 445 { 446 to->alloc *= 2; 447 if (to->alloc < from->length) 448 to->alloc = from->length; 449 if (to->alloc < INITIAL_BUFFER_SIZE) 450 to->alloc = INITIAL_BUFFER_SIZE; 451 /* Use FREE()+MALLOC() instead of REALLOC() to 452 avoid unnecessary copying of old text. */ 453 FREE(to->text); 454 to->text = MALLOC(to->alloc, char); 455 } 456 457 to->active = to->text; 458 to->length = from->length; 459 to->chomped = from->chomped; 460 MEMCPY(to->active, from->active, from->length); 461 462 #ifdef HAVE_MBRTOWC 463 if (state) 464 MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate)); 465 #endif 466 } 467 468 /* Append the contents of the line `from' to the line `to'. 469 Copy the multibyte state if `state' is true. */ 470 static void line_append P_((struct line *from, struct line *to, int state)); 471 static void 472 line_append(from, to, state) 473 struct line *from; 474 struct line *to; 475 int state; 476 { 477 str_append(to, "\n", 1); 478 str_append(to, from->active, from->length); 479 to->chomped = from->chomped; 480 481 #ifdef HAVE_MBRTOWC 482 if (state) 483 MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate)); 484 #endif 485 } 486 487 /* Exchange two "struct line" buffers. 488 Copy the multibyte state if `state' is true. */ 489 static void line_exchange P_((struct line *a, struct line *b, int state)); 490 static void 491 line_exchange(a, b, state) 492 struct line *a; 493 struct line *b; 494 int state; 495 { 496 struct line t; 497 498 if (state) 499 { 500 MEMCPY(&t, a, sizeof (struct line)); 501 MEMCPY( a, b, sizeof (struct line)); 502 MEMCPY( b, &t, sizeof (struct line)); 503 } 504 else 505 { 506 MEMCPY(&t, a, SIZEOF_LINE); 507 MEMCPY( a, b, SIZEOF_LINE); 508 MEMCPY( b, &t, SIZEOF_LINE); 509 } 510 } 511 512 513 /* dummy function to simplify read_pattern_space() */ 515 static bool read_always_fail P_((struct input *)); 516 static bool 517 read_always_fail(input) 518 struct input *input UNUSED; 519 { 520 return false; 521 } 522 523 static bool read_file_line P_((struct input *)); 524 static bool 525 read_file_line(input) 526 struct input *input; 527 { 528 static char *b; 529 static size_t blen; 530 531 long result = ck_getline (&b, &blen, input->fp); 532 if (result <= 0) 533 return false; 534 535 /* Remove the trailing new-line that is left by getline. */ 536 if (b[result - 1] == '\n') 537 --result; 538 else 539 line.chomped = false; 540 541 str_append(&line, b, result); 542 return true; 543 } 544 545 546 static inline void output_missing_newline P_((struct output *)); 548 static inline void 549 output_missing_newline(outf) 550 struct output *outf; 551 { 552 if (outf->missing_newline) 553 { 554 ck_fwrite("\n", 1, 1, outf->fp); 555 outf->missing_newline = false; 556 } 557 } 558 559 static inline void flush_output P_((FILE *)); 560 static inline void 561 flush_output(fp) 562 FILE *fp; 563 { 564 if (fp != stdout || unbuffered_output) 565 ck_fflush(fp); 566 } 567 568 static void output_line P_((const char *, size_t, int, struct output *)); 569 static void 570 output_line(text, length, nl, outf) 571 const char *text; 572 size_t length; 573 int nl; 574 struct output *outf; 575 { 576 if (!text) 577 return; 578 579 output_missing_newline(outf); 580 if (length) 581 ck_fwrite(text, 1, length, outf->fp); 582 if (nl) 583 ck_fwrite("\n", 1, 1, outf->fp); 584 else 585 outf->missing_newline = true; 586 587 flush_output(outf->fp); 588 } 589 590 static struct append_queue *next_append_slot P_((void)); 591 static struct append_queue * 592 next_append_slot() 593 { 594 struct append_queue *n = MALLOC(1, struct append_queue); 595 596 n->fname = NULL; 597 n->text = NULL; 598 n->textlen = 0; 599 n->next = NULL; 600 n->free = false; 601 602 if (append_tail) 603 append_tail->next = n; 604 else 605 append_head = n; 606 return append_tail = n; 607 } 608 609 static void release_append_queue P_((void)); 610 static void 611 release_append_queue() 612 { 613 struct append_queue *p, *q; 614 615 for (p=append_head; p; p=q) 616 { 617 if (p->free) 618 FREE(p->text); 619 620 q = p->next; 621 FREE(p); 622 } 623 append_head = append_tail = NULL; 624 } 625 626 static void dump_append_queue P_((void)); 627 static void 628 dump_append_queue() 629 { 630 struct append_queue *p; 631 632 output_missing_newline(&output_file); 633 for (p=append_head; p; p=p->next) 634 { 635 if (p->text) 636 ck_fwrite(p->text, 1, p->textlen, output_file.fp); 637 638 if (p->fname) 639 { 640 char buf[FREAD_BUFFER_SIZE]; 641 size_t cnt; 642 FILE *fp; 643 644 /* "If _fname_ does not exist or cannot be read, it shall 645 be treated as if it were an empty file, causing no error 646 condition." IEEE Std 1003.2-1992 647 So, don't fail. */ 648 fp = ck_fopen(p->fname, read_mode, false); 649 if (fp) 650 { 651 while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0) 652 ck_fwrite(buf, 1, cnt, output_file.fp); 653 ck_fclose(fp); 654 } 655 } 656 } 657 658 flush_output(output_file.fp); 659 release_append_queue(); 660 } 661 662 663 /* Compute the name of the backup file for in-place editing */ 665 static char *get_backup_file_name P_((const char *)); 666 static char * 667 get_backup_file_name(name) 668 const char *name; 669 { 670 char *old_asterisk, *asterisk, *backup, *p; 671 int name_length = strlen(name), backup_length = strlen(in_place_extension); 672 673 /* Compute the length of the backup file */ 674 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; 675 (asterisk = strchr(old_asterisk, '*')); 676 old_asterisk = asterisk + 1) 677 backup_length += name_length - 1; 678 679 p = backup = xmalloc(backup_length + 1); 680 681 /* Each iteration gobbles up to an asterisk */ 682 for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; 683 (asterisk = strchr(old_asterisk, '*')); 684 old_asterisk = asterisk + 1) 685 { 686 MEMCPY (p, old_asterisk, asterisk - old_asterisk); 687 p += asterisk - old_asterisk; 688 strcpy (p, name); 689 p += name_length; 690 } 691 692 /* Tack on what's after the last asterisk */ 693 strcpy (p, old_asterisk); 694 return backup; 695 } 696 697 /* Initialize a struct input for the named file. */ 698 static void open_next_file P_((const char *name, struct input *)); 699 static void 700 open_next_file(name, input) 701 const char *name; 702 struct input *input; 703 { 704 buffer.length = 0; 705 706 if (name[0] == '-' && name[1] == '\0' && !in_place_extension) 707 { 708 clearerr(stdin); /* clear any stale EOF indication */ 709 input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false); 710 } 711 else if ( ! (input->fp = ck_fopen(name, read_mode, false)) ) 712 { 713 const char *ptr = strerror(errno); 714 fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr); 715 input->read_fn = read_always_fail; /* a redundancy */ 716 ++input->bad_count; 717 return; 718 } 719 720 input->read_fn = read_file_line; 721 722 if (in_place_extension) 723 { 724 int input_fd; 725 char *tmpdir, *p; 726 #ifndef BOOTSTRAP 727 security_context_t old_fscreatecon; 728 int reset_fscreatecon = 0; 729 memset (&old_fscreatecon, 0, sizeof (old_fscreatecon)); 730 #endif 731 732 if (follow_symlinks) 733 input->in_file_name = follow_symlink (name); 734 else 735 input->in_file_name = name; 736 737 /* get the base name */ 738 tmpdir = ck_strdup(input->in_file_name); 739 if ((p = strrchr(tmpdir, '/'))) 740 *p = 0; 741 else 742 strcpy(tmpdir, "."); 743 744 if (isatty (fileno (input->fp))) 745 panic(_("couldn't edit %s: is a terminal"), input->in_file_name); 746 747 input_fd = fileno (input->fp); 748 fstat (input_fd, &input->st); 749 if (!S_ISREG (input->st.st_mode)) 750 panic(_("couldn't edit %s: not a regular file"), input->in_file_name); 751 752 #ifndef BOOTSTRAP 753 if (is_selinux_enabled ()) 754 { 755 security_context_t con; 756 if (getfilecon (input->in_file_name, &con) != -1) 757 { 758 /* Save and restore the old context for the sake of w and W 759 commands. */ 760 reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0; 761 if (setfscreatecon (con) < 0) 762 fprintf (stderr, _("%s: warning: failed to set default file creation context to %s: %s"), 763 myname, con, strerror (errno)); 764 freecon (con); 765 } 766 else 767 { 768 if (errno != ENOSYS) 769 fprintf (stderr, _("%s: warning: failed to get security context of %s: %s"), 770 myname, input->in_file_name, strerror (errno)); 771 } 772 } 773 #endif 774 775 output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed"); 776 output_file.missing_newline = false; 777 free (tmpdir); 778 779 #ifndef BOOTSTRAP 780 if (reset_fscreatecon) 781 { 782 setfscreatecon (old_fscreatecon); 783 freecon (old_fscreatecon); 784 } 785 #endif 786 787 if (!output_file.fp) 788 panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno)); 789 } 790 else 791 output_file.fp = stdout; 792 } 793 794 795 /* Clean up an input stream that we are done with. */ 796 static void closedown P_((struct input *)); 797 static void 798 closedown(input) 799 struct input *input; 800 { 801 input->read_fn = read_always_fail; 802 if (!input->fp) 803 return; 804 805 if (in_place_extension && output_file.fp != NULL) 806 { 807 const char *target_name; 808 int input_fd, output_fd; 809 810 target_name = input->in_file_name; 811 input_fd = fileno (input->fp); 812 output_fd = fileno (output_file.fp); 813 copy_acl (input->in_file_name, input_fd, 814 input->out_file_name, output_fd, 815 input->st.st_mode); 816 #ifdef HAVE_FCHOWN 817 if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1) 818 fchown (output_fd, -1, input->st.st_gid); 819 #endif 820 821 ck_fclose (input->fp); 822 ck_fclose (output_file.fp); 823 if (strcmp(in_place_extension, "*") != 0) 824 { 825 char *backup_file_name = get_backup_file_name(target_name); 826 ck_rename (target_name, backup_file_name, input->out_file_name); 827 free (backup_file_name); 828 } 829 830 ck_rename (input->out_file_name, target_name, input->out_file_name); 831 free (input->out_file_name); 832 } 833 else 834 ck_fclose (input->fp); 835 836 input->fp = NULL; 837 } 838 839 /* Reset range commands so that they are marked as non-matching */ 840 static void reset_addresses P_((struct vector *)); 841 static void 842 reset_addresses(vec) 843 struct vector *vec; 844 { 845 struct sed_cmd *cur_cmd; 846 int n; 847 848 for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++) 849 if (cur_cmd->a1 850 && cur_cmd->a1->addr_type == ADDR_IS_NUM 851 && cur_cmd->a1->addr_number == 0) 852 cur_cmd->range_state = RANGE_ACTIVE; 853 else 854 cur_cmd->range_state = RANGE_INACTIVE; 855 } 856 857 /* Read in the next line of input, and store it in the pattern space. 858 Return zero if there is nothing left to input. */ 859 static bool read_pattern_space P_((struct input *, struct vector *, int)); 860 static bool 861 read_pattern_space(input, the_program, append) 862 struct input *input; 863 struct vector *the_program; 864 int append; 865 { 866 if (append_head) /* redundant test to optimize for common case */ 867 dump_append_queue(); 868 replaced = false; 869 if (!append) 870 line.length = 0; 871 line.chomped = true; /* default, until proved otherwise */ 872 873 while ( ! (*input->read_fn)(input) ) 874 { 875 closedown(input); 876 877 if (!*input->file_list) 878 return false; 879 880 if (input->reset_at_next_file) 881 { 882 input->line_number = 0; 883 hold.length = 0; 884 reset_addresses (the_program); 885 rewind_read_files (); 886 887 /* If doing in-place editing, we will never append the 888 new-line to this file; but if the output goes to stdout, 889 we might still have to output the missing new-line. */ 890 if (in_place_extension) 891 output_file.missing_newline = false; 892 893 input->reset_at_next_file = separate_files; 894 } 895 896 open_next_file (*input->file_list++, input); 897 } 898 899 ++input->line_number; 900 return true; 901 } 902 903 904 static bool last_file_with_data_p P_((struct input *)); 906 static bool 907 last_file_with_data_p(input) 908 struct input *input; 909 { 910 for (;;) 911 { 912 int ch; 913 914 closedown(input); 915 if (!*input->file_list) 916 return true; 917 open_next_file(*input->file_list++, input); 918 if (input->fp) 919 { 920 if ((ch = getc(input->fp)) != EOF) 921 { 922 ungetc(ch, input->fp); 923 return false; 924 } 925 } 926 } 927 } 928 929 /* Determine if we match the `$' address. */ 930 static bool test_eof P_((struct input *)); 931 static bool 932 test_eof(input) 933 struct input *input; 934 { 935 int ch; 936 937 if (buffer.length) 938 return false; 939 if (!input->fp) 940 return separate_files || last_file_with_data_p(input); 941 if (feof(input->fp)) 942 return separate_files || last_file_with_data_p(input); 943 if ((ch = getc(input->fp)) == EOF) 944 return separate_files || last_file_with_data_p(input); 945 ungetc(ch, input->fp); 946 return false; 947 } 948 949 /* Return non-zero if the current line matches the address 950 pointed to by `addr'. */ 951 static bool match_an_address_p P_((struct addr *, struct input *)); 952 static bool 953 match_an_address_p(addr, input) 954 struct addr *addr; 955 struct input *input; 956 { 957 switch (addr->addr_type) 958 { 959 case ADDR_IS_NULL: 960 return true; 961 962 case ADDR_IS_REGEX: 963 return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0); 964 965 case ADDR_IS_NUM_MOD: 966 return (input->line_number >= addr->addr_number 967 && ((input->line_number - addr->addr_number) % addr->addr_step) == 0); 968 969 case ADDR_IS_STEP: 970 case ADDR_IS_STEP_MOD: 971 /* reminder: these are only meaningful for a2 addresses */ 972 /* a2->addr_number needs to be recomputed each time a1 address 973 matches for the step and step_mod types */ 974 return (addr->addr_number <= input->line_number); 975 976 case ADDR_IS_LAST: 977 return test_eof(input); 978 979 /* ADDR_IS_NUM is handled in match_address_p. */ 980 case ADDR_IS_NUM: 981 default: 982 panic("INTERNAL ERROR: bad address type"); 983 } 984 /*NOTREACHED*/ 985 return false; 986 } 987 988 /* return non-zero if current address is valid for cmd */ 989 static bool match_address_p P_((struct sed_cmd *, struct input *)); 990 static bool 991 match_address_p(cmd, input) 992 struct sed_cmd *cmd; 993 struct input *input; 994 { 995 if (!cmd->a1) 996 return true; 997 998 if (cmd->range_state != RANGE_ACTIVE) 999 { 1000 /* Find if we are going to activate a range. Handle ADDR_IS_NUM 1001 specially: it represent an "absolute" state, it should not 1002 be computed like regexes. */ 1003 if (cmd->a1->addr_type == ADDR_IS_NUM) 1004 { 1005 if (!cmd->a2) 1006 return (input->line_number == cmd->a1->addr_number); 1007 1008 if (cmd->range_state == RANGE_CLOSED 1009 || input->line_number < cmd->a1->addr_number) 1010 return false; 1011 } 1012 else 1013 { 1014 if (!cmd->a2) 1015 return match_an_address_p(cmd->a1, input); 1016 1017 if (!match_an_address_p(cmd->a1, input)) 1018 return false; 1019 } 1020 1021 /* Ok, start a new range. */ 1022 cmd->range_state = RANGE_ACTIVE; 1023 switch (cmd->a2->addr_type) 1024 { 1025 case ADDR_IS_REGEX: 1026 /* Always include at least two lines. */ 1027 return true; 1028 case ADDR_IS_NUM: 1029 /* Same handling as below, but always include at least one line. */ 1030 if (input->line_number >= cmd->a2->addr_number) 1031 cmd->range_state = RANGE_CLOSED; 1032 return true; 1033 case ADDR_IS_STEP: 1034 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step; 1035 return true; 1036 case ADDR_IS_STEP_MOD: 1037 cmd->a2->addr_number = input->line_number + cmd->a2->addr_step 1038 - (input->line_number%cmd->a2->addr_step); 1039 return true; 1040 default: 1041 break; 1042 } 1043 } 1044 1045 /* cmd->range_state == RANGE_ACTIVE. Check if the range is 1046 ending; also handle ADDR_IS_NUM specially in this case. */ 1047 1048 if (cmd->a2->addr_type == ADDR_IS_NUM) 1049 { 1050 /* If the second address is a line number, and if we got past 1051 that line, fail to match (it can happen when you jump 1052 over such addresses with `b' and `t'. Use RANGE_CLOSED 1053 so that the range is not re-enabled anymore. */ 1054 if (input->line_number >= cmd->a2->addr_number) 1055 cmd->range_state = RANGE_CLOSED; 1056 1057 return (input->line_number <= cmd->a2->addr_number); 1058 } 1059 1060 /* Other addresses are treated as usual. */ 1061 if (match_an_address_p(cmd->a2, input)) 1062 cmd->range_state = RANGE_CLOSED; 1063 1064 return true; 1065 } 1066 1067 1068 static void do_list P_((int line_len)); 1070 static void 1071 do_list(line_len) 1072 int line_len; 1073 { 1074 unsigned char *p = CAST(unsigned char *)line.active; 1075 countT len = line.length; 1076 countT width = 0; 1077 char obuf[180]; /* just in case we encounter a 512-bit char (;-) */ 1078 char *o; 1079 size_t olen; 1080 FILE *fp = output_file.fp; 1081 1082 output_missing_newline(&output_file); 1083 for (; len--; ++p) { 1084 o = obuf; 1085 1086 /* Some locales define 8-bit characters as printable. This makes the 1087 testsuite fail at 8to7.sed because the `l' command in fact will not 1088 convert the 8-bit characters. */ 1089 #if defined isascii || defined HAVE_ISASCII 1090 if (isascii(*p) && ISPRINT(*p)) { 1091 #else 1092 if (ISPRINT(*p)) { 1093 #endif 1094 *o++ = *p; 1095 if (*p == '\\') 1096 *o++ = '\\'; 1097 } else { 1098 *o++ = '\\'; 1099 switch (*p) { 1100 #if defined __STDC__ && __STDC__-0 1101 case '\a': *o++ = 'a'; break; 1102 #else /* Not STDC; we'll just assume ASCII */ 1103 case 007: *o++ = 'a'; break; 1104 #endif 1105 case '\b': *o++ = 'b'; break; 1106 case '\f': *o++ = 'f'; break; 1107 case '\n': *o++ = 'n'; break; 1108 case '\r': *o++ = 'r'; break; 1109 case '\t': *o++ = 't'; break; 1110 case '\v': *o++ = 'v'; break; 1111 default: 1112 sprintf(o, "%03o", *p); 1113 o += strlen(o); 1114 break; 1115 } 1116 } 1117 olen = o - obuf; 1118 if (width+olen >= line_len && line_len > 0) { 1119 ck_fwrite("\\\n", 1, 2, fp); 1120 width = 0; 1121 } 1122 ck_fwrite(obuf, 1, olen, fp); 1123 width += olen; 1124 } 1125 ck_fwrite("$\n", 1, 2, fp); 1126 flush_output (fp); 1127 } 1128 1129 1130 static enum replacement_types append_replacement P_((struct line *, struct replacement *, 1131 struct re_registers *, 1132 enum replacement_types)); 1133 static enum replacement_types 1134 append_replacement (buf, p, regs, repl_mod) 1135 struct line *buf; 1136 struct replacement *p; 1137 struct re_registers *regs; 1138 enum replacement_types repl_mod; 1139 { 1140 for (; p; p=p->next) 1141 { 1142 int i = p->subst_id; 1143 enum replacement_types curr_type; 1144 1145 /* Apply a \[lu] modifier that was given earlier, but which we 1146 have not had yet the occasion to apply. But don't do it 1147 if this replacement has a modifier of its own. */ 1148 curr_type = (p->repl_type & REPL_MODIFIERS) 1149 ? p->repl_type 1150 : p->repl_type | repl_mod; 1151 1152 repl_mod = 0; 1153 if (p->prefix_length) 1154 { 1155 str_append_modified(buf, p->prefix, p->prefix_length, 1156 curr_type); 1157 curr_type &= ~REPL_MODIFIERS; 1158 } 1159 1160 if (0 <= i) 1161 { 1162 if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS) 1163 /* Save this modifier, we shall apply it later. 1164 e.g. in s/()([a-z])/\u\1\2/ 1165 the \u modifier is applied to \2, not \1 */ 1166 repl_mod = curr_type & REPL_MODIFIERS; 1167 1168 else if (regs->end[i] != regs->start[i]) 1169 str_append_modified(buf, line.active + regs->start[i], 1170 CAST(size_t)(regs->end[i] - regs->start[i]), 1171 curr_type); 1172 } 1173 } 1174 1175 return repl_mod; 1176 } 1177 1178 static void do_subst P_((struct subst *)); 1179 static void 1180 do_subst(sub) 1181 struct subst *sub; 1182 { 1183 size_t start = 0; /* where to start scan for (next) match in LINE */ 1184 size_t last_end = 0; /* where did the last successful match end in LINE */ 1185 countT count = 0; /* number of matches found */ 1186 bool again = true; 1187 1188 static struct re_registers regs; 1189 1190 line_reset(&s_accum, &line); 1191 1192 /* The first part of the loop optimizes s/xxx// when xxx is at the 1193 start, and s/xxx$// */ 1194 if (!match_regex(sub->regx, line.active, line.length, start, 1195 ®s, sub->max_id + 1)) 1196 return; 1197 1198 if (!sub->replacement && sub->numb <= 1) 1199 { 1200 if (regs.start[0] == 0 && !sub->global) 1201 { 1202 /* We found a match, set the `replaced' flag. */ 1203 replaced = true; 1204 1205 line.active += regs.end[0]; 1206 line.length -= regs.end[0]; 1207 line.alloc -= regs.end[0]; 1208 goto post_subst; 1209 } 1210 else if (regs.end[0] == line.length) 1211 { 1212 /* We found a match, set the `replaced' flag. */ 1213 replaced = true; 1214 1215 line.length = regs.start[0]; 1216 goto post_subst; 1217 } 1218 } 1219 1220 do 1221 { 1222 enum replacement_types repl_mod = 0; 1223 1224 size_t offset = regs.start[0]; 1225 size_t matched = regs.end[0] - regs.start[0]; 1226 1227 /* Copy stuff to the left of this match into the output string. */ 1228 if (start < offset) 1229 str_append(&s_accum, line.active + start, offset - start); 1230 1231 /* If we're counting up to the Nth match, are we there yet? 1232 And even if we are there, there is another case we have to 1233 skip: are we matching an empty string immediately following 1234 another match? 1235 1236 This latter case avoids that baaaac, when passed through 1237 s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is 1238 unacceptable because it is not consistently applied (for 1239 example, `baaaa' gives `xbx', not `xbxx'). */ 1240 if ((matched > 0 || count == 0 || offset > last_end) 1241 && ++count >= sub->numb) 1242 { 1243 /* We found a match, set the `replaced' flag. */ 1244 replaced = true; 1245 1246 /* Now expand the replacement string into the output string. */ 1247 repl_mod = append_replacement (&s_accum, sub->replacement, ®s, repl_mod); 1248 again = sub->global; 1249 } 1250 else 1251 { 1252 /* The match was not replaced. Copy the text until its 1253 end; if it was vacuous, skip over one character and 1254 add that character to the output. */ 1255 if (matched == 0) 1256 { 1257 if (start < line.length) 1258 matched = 1; 1259 else 1260 break; 1261 } 1262 1263 str_append(&s_accum, line.active + offset, matched); 1264 } 1265 1266 /* Start after the match. last_end is the real end of the matched 1267 substring, excluding characters that were skipped in case the RE 1268 matched the empty string. */ 1269 start = offset + matched; 1270 last_end = regs.end[0]; 1271 } 1272 while (again 1273 && start <= line.length 1274 && match_regex(sub->regx, line.active, line.length, start, 1275 ®s, sub->max_id + 1)); 1276 1277 /* Copy stuff to the right of the last match into the output string. */ 1278 if (start < line.length) 1279 str_append(&s_accum, line.active + start, line.length-start); 1280 s_accum.chomped = line.chomped; 1281 1282 /* Exchange line and s_accum. This can be much cheaper 1283 than copying s_accum.active into line.text (for huge lines). */ 1284 line_exchange(&line, &s_accum, false); 1285 1286 /* Finish up. */ 1287 if (count < sub->numb) 1288 return; 1289 1290 post_subst: 1291 if (sub->print & 1) 1292 output_line(line.active, line.length, line.chomped, &output_file); 1293 1294 if (sub->eval) 1295 { 1296 #ifdef HAVE_POPEN 1297 FILE *pipe_fp; 1298 line_reset(&s_accum, NULL); 1299 1300 str_append (&line, "", 1); 1301 pipe_fp = popen(line.active, "r"); 1302 1303 if (pipe_fp != NULL) 1304 { 1305 while (!feof (pipe_fp)) 1306 { 1307 char buf[4096]; 1308 int n = fread (buf, sizeof(char), 4096, pipe_fp); 1309 if (n > 0) 1310 str_append(&s_accum, buf, n); 1311 } 1312 1313 pclose (pipe_fp); 1314 1315 /* Exchange line and s_accum. This can be much cheaper than copying 1316 s_accum.active into line.text (for huge lines). See comment above 1317 for 'g' as to while the third argument is incorrect anyway. */ 1318 line_exchange(&line, &s_accum, true); 1319 if (line.length && 1320 line.active[line.length - 1] == '\n') 1321 line.length--; 1322 } 1323 else 1324 panic(_("error in subprocess")); 1325 #else 1326 panic(_("option `e' not supported")); 1327 #endif 1328 } 1329 1330 if (sub->print & 2) 1331 output_line(line.active, line.length, line.chomped, &output_file); 1332 if (sub->outf) 1333 output_line(line.active, line.length, line.chomped, sub->outf); 1334 } 1335 1336 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION 1337 /* Used to attempt a simple-minded optimization. */ 1338 1339 static countT branches; 1340 1341 static countT count_branches P_((struct vector *)); 1342 static countT 1343 count_branches(program) 1344 struct vector *program; 1345 { 1346 struct sed_cmd *cur_cmd = program->v; 1347 countT isn_cnt = program->v_length; 1348 countT cnt = 0; 1349 1350 while (isn_cnt-- > 0) 1351 { 1352 switch (cur_cmd->cmd) 1353 { 1354 case 'b': 1355 case 't': 1356 case 'T': 1357 case '{': 1358 ++cnt; 1359 } 1360 } 1361 return cnt; 1362 } 1363 1364 static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *)); 1365 static struct sed_cmd * 1366 shrink_program(vec, cur_cmd) 1367 struct vector *vec; 1368 struct sed_cmd *cur_cmd; 1369 { 1370 struct sed_cmd *v = vec->v; 1371 struct sed_cmd *last_cmd = v + vec->v_length; 1372 struct sed_cmd *p; 1373 countT cmd_cnt; 1374 1375 for (p=v; p < cur_cmd; ++p) 1376 if (p->cmd != '#') 1377 MEMCPY(v++, p, sizeof *v); 1378 cmd_cnt = v - vec->v; 1379 1380 for (; p < last_cmd; ++p) 1381 if (p->cmd != '#') 1382 MEMCPY(v++, p, sizeof *v); 1383 vec->v_length = v - vec->v; 1384 1385 return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0; 1386 } 1387 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ 1388 1389 /* Execute the program `vec' on the current input line. 1390 Return exit status if caller should quit, -1 otherwise. */ 1391 static int execute_program P_((struct vector *, struct input *)); 1392 static int 1393 execute_program(vec, input) 1394 struct vector *vec; 1395 struct input *input; 1396 { 1397 struct sed_cmd *cur_cmd; 1398 struct sed_cmd *end_cmd; 1399 1400 cur_cmd = vec->v; 1401 end_cmd = vec->v + vec->v_length; 1402 while (cur_cmd < end_cmd) 1403 { 1404 if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang) 1405 { 1406 switch (cur_cmd->cmd) 1407 { 1408 case 'a': 1409 { 1410 struct append_queue *aq = next_append_slot(); 1411 aq->text = cur_cmd->x.cmd_txt.text; 1412 aq->textlen = cur_cmd->x.cmd_txt.text_length; 1413 } 1414 break; 1415 1416 case '{': 1417 case 'b': 1418 cur_cmd = vec->v + cur_cmd->x.jump_index; 1419 continue; 1420 1421 case '}': 1422 case '#': 1423 case ':': 1424 /* Executing labels and block-ends are easy. */ 1425 break; 1426 1427 case 'c': 1428 if (cur_cmd->range_state != RANGE_ACTIVE) 1429 output_line(cur_cmd->x.cmd_txt.text, 1430 cur_cmd->x.cmd_txt.text_length - 1, true, 1431 &output_file); 1432 /* POSIX.2 is silent about c starting a new cycle, 1433 but it seems to be expected (and make sense). */ 1434 /* Fall Through */ 1435 case 'd': 1436 return -1; 1437 1438 case 'D': 1439 { 1440 char *p = memchr(line.active, '\n', line.length); 1441 if (!p) 1442 return -1; 1443 1444 ++p; 1445 line.alloc -= p - line.active; 1446 line.length -= p - line.active; 1447 line.active += p - line.active; 1448 1449 /* reset to start next cycle without reading a new line: */ 1450 cur_cmd = vec->v; 1451 continue; 1452 } 1453 1454 case 'e': { 1455 #ifdef HAVE_POPEN 1456 FILE *pipe_fp; 1457 int cmd_length = cur_cmd->x.cmd_txt.text_length; 1458 line_reset(&s_accum, NULL); 1459 1460 if (!cmd_length) 1461 { 1462 str_append (&line, "", 1); 1463 pipe_fp = popen(line.active, "r"); 1464 } 1465 else 1466 { 1467 cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0; 1468 pipe_fp = popen(cur_cmd->x.cmd_txt.text, "r"); 1469 output_missing_newline(&output_file); 1470 } 1471 1472 if (pipe_fp != NULL) 1473 { 1474 char buf[4096]; 1475 int n; 1476 while (!feof (pipe_fp)) 1477 if ((n = fread (buf, sizeof(char), 4096, pipe_fp)) > 0) 1478 { 1479 if (!cmd_length) 1480 str_append(&s_accum, buf, n); 1481 else 1482 ck_fwrite(buf, 1, n, output_file.fp); 1483 } 1484 1485 pclose (pipe_fp); 1486 if (!cmd_length) 1487 { 1488 /* Store into pattern space for plain `e' commands */ 1489 if (s_accum.length && 1490 s_accum.active[s_accum.length - 1] == '\n') 1491 s_accum.length--; 1492 1493 /* Exchange line and s_accum. This can be much 1494 cheaper than copying s_accum.active into line.text 1495 (for huge lines). See comment above for 'g' as 1496 to while the third argument is incorrect anyway. */ 1497 line_exchange(&line, &s_accum, true); 1498 } 1499 else 1500 flush_output(output_file.fp); 1501 1502 } 1503 else 1504 panic(_("error in subprocess")); 1505 #else 1506 panic(_("`e' command not supported")); 1507 #endif 1508 break; 1509 } 1510 1511 case 'g': 1512 /* We do not have a really good choice for the third parameter. 1513 The problem is that hold space and the input file might as 1514 well have different states; copying it from hold space means 1515 that subsequent input might be read incorrectly, while 1516 keeping it as in pattern space means that commands operating 1517 on the moved buffer might consider a wrong character set. 1518 We keep it true because it's what sed <= 4.1.5 did. */ 1519 line_copy(&hold, &line, true); 1520 break; 1521 1522 case 'G': 1523 /* We do not have a really good choice for the third parameter. 1524 The problem is that hold space and pattern space might as 1525 well have different states. So, true is as wrong as false. 1526 We keep it true because it's what sed <= 4.1.5 did, but 1527 we could consider having line_ap. */ 1528 line_append(&hold, &line, true); 1529 break; 1530 1531 case 'h': 1532 /* Here, it is ok to have true. */ 1533 line_copy(&line, &hold, true); 1534 break; 1535 1536 case 'H': 1537 /* See comment above for 'G' regarding the third parameter. */ 1538 line_append(&line, &hold, true); 1539 break; 1540 1541 case 'i': 1542 output_line(cur_cmd->x.cmd_txt.text, 1543 cur_cmd->x.cmd_txt.text_length - 1, 1544 true, &output_file); 1545 break; 1546 1547 case 'l': 1548 do_list(cur_cmd->x.int_arg == -1 1549 ? lcmd_out_line_len 1550 : cur_cmd->x.int_arg); 1551 break; 1552 1553 case 'L': 1554 output_missing_newline(&output_file); 1555 fmt(line.active, line.active + line.length, 1556 cur_cmd->x.int_arg == -1 1557 ? lcmd_out_line_len 1558 : cur_cmd->x.int_arg, 1559 output_file.fp); 1560 flush_output(output_file.fp); 1561 break; 1562 1563 case 'n': 1564 if (!no_default_output) 1565 output_line(line.active, line.length, line.chomped, &output_file); 1566 if (test_eof(input) || !read_pattern_space(input, vec, false)) 1567 return -1; 1568 break; 1569 1570 case 'N': 1571 str_append(&line, "\n", 1); 1572 1573 if (test_eof(input) || !read_pattern_space(input, vec, true)) 1574 { 1575 line.length--; 1576 if (posixicity == POSIXLY_EXTENDED && !no_default_output) 1577 output_line(line.active, line.length, line.chomped, 1578 &output_file); 1579 return -1; 1580 } 1581 break; 1582 1583 case 'p': 1584 output_line(line.active, line.length, line.chomped, &output_file); 1585 break; 1586 1587 case 'P': 1588 { 1589 char *p = memchr(line.active, '\n', line.length); 1590 output_line(line.active, p ? p - line.active : line.length, 1591 p ? true : line.chomped, &output_file); 1592 } 1593 break; 1594 1595 case 'q': 1596 if (!no_default_output) 1597 output_line(line.active, line.length, line.chomped, &output_file); 1598 dump_append_queue(); 1599 1600 case 'Q': 1601 return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg; 1602 1603 case 'r': 1604 if (cur_cmd->x.fname) 1605 { 1606 struct append_queue *aq = next_append_slot(); 1607 aq->fname = cur_cmd->x.fname; 1608 } 1609 break; 1610 1611 case 'R': 1612 if (cur_cmd->x.fp && !feof (cur_cmd->x.fp)) 1613 { 1614 struct append_queue *aq; 1615 size_t buflen; 1616 char *text = NULL; 1617 int result; 1618 1619 result = ck_getline (&text, &buflen, cur_cmd->x.fp); 1620 if (result != EOF) 1621 { 1622 aq = next_append_slot(); 1623 aq->free = true; 1624 aq->text = text; 1625 aq->textlen = result; 1626 } 1627 } 1628 break; 1629 1630 case 's': 1631 do_subst(cur_cmd->x.cmd_subst); 1632 break; 1633 1634 case 't': 1635 if (replaced) 1636 { 1637 replaced = false; 1638 cur_cmd = vec->v + cur_cmd->x.jump_index; 1639 continue; 1640 } 1641 break; 1642 1643 case 'T': 1644 if (!replaced) 1645 { 1646 cur_cmd = vec->v + cur_cmd->x.jump_index; 1647 continue; 1648 } 1649 else 1650 replaced = false; 1651 break; 1652 1653 case 'w': 1654 if (cur_cmd->x.fp) 1655 output_line(line.active, line.length, 1656 line.chomped, cur_cmd->x.outf); 1657 break; 1658 1659 case 'W': 1660 if (cur_cmd->x.fp) 1661 { 1662 char *p = memchr(line.active, '\n', line.length); 1663 output_line(line.active, p ? p - line.active : line.length, 1664 p ? true : line.chomped, cur_cmd->x.outf); 1665 } 1666 break; 1667 1668 case 'x': 1669 /* See comment above for 'g' regarding the third parameter. */ 1670 line_exchange(&line, &hold, false); 1671 break; 1672 1673 case 'y': 1674 { 1675 #ifdef HAVE_MBRTOWC 1676 if (mb_cur_max > 1) 1677 { 1678 int idx, prev_idx; /* index in the input line. */ 1679 char **trans; 1680 mbstate_t mbstate; 1681 memset(&mbstate, 0, sizeof(mbstate_t)); 1682 for (idx = 0; idx < line.length;) 1683 { 1684 int mbclen, i; 1685 mbclen = MBRLEN (line.active + idx, line.length - idx, 1686 &mbstate); 1687 /* An invalid sequence, or a truncated multibyte 1688 character. We treat it as a singlebyte character. 1689 */ 1690 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 1691 || mbclen == 0) 1692 mbclen = 1; 1693 1694 trans = cur_cmd->x.translatemb; 1695 /* `i' indicate i-th translate pair. */ 1696 for (i = 0; trans[2*i] != NULL; i++) 1697 { 1698 if (strncmp(line.active + idx, trans[2*i], mbclen) == 0) 1699 { 1700 bool move_remain_buffer = false; 1701 int trans_len = strlen(trans[2*i+1]); 1702 1703 if (mbclen < trans_len) 1704 { 1705 int new_len; 1706 new_len = line.length + 1 + trans_len - mbclen; 1707 /* We must extend the line buffer. */ 1708 if (line.alloc < new_len) 1709 { 1710 /* And we must resize the buffer. */ 1711 resize_line(&line, new_len); 1712 } 1713 move_remain_buffer = true; 1714 } 1715 else if (mbclen > trans_len) 1716 { 1717 /* We must truncate the line buffer. */ 1718 move_remain_buffer = true; 1719 } 1720 prev_idx = idx; 1721 if (move_remain_buffer) 1722 { 1723 int move_len, move_offset; 1724 char *move_from, *move_to; 1725 /* Move the remaining with \0. */ 1726 move_from = line.active + idx + mbclen; 1727 move_to = line.active + idx + trans_len; 1728 move_len = line.length + 1 - idx - mbclen; 1729 move_offset = trans_len - mbclen; 1730 memmove(move_to, move_from, move_len); 1731 line.length += move_offset; 1732 idx += move_offset; 1733 } 1734 strncpy(line.active + prev_idx, trans[2*i+1], 1735 trans_len); 1736 break; 1737 } 1738 } 1739 idx += mbclen; 1740 } 1741 } 1742 else 1743 #endif /* HAVE_MBRTOWC */ 1744 { 1745 unsigned char *p, *e; 1746 p = CAST(unsigned char *)line.active; 1747 for (e=p+line.length; p<e; ++p) 1748 *p = cur_cmd->x.translate[*p]; 1749 } 1750 } 1751 break; 1752 1753 case 'z': 1754 line.length = 0; 1755 break; 1756 1757 case '=': 1758 output_missing_newline(&output_file); 1759 fprintf(output_file.fp, "%lu\n", 1760 CAST(unsigned long)input->line_number); 1761 flush_output(output_file.fp); 1762 break; 1763 1764 default: 1765 panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd); 1766 } 1767 } 1768 1769 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION 1770 /* If our top-level program consists solely of commands with 1771 ADDR_IS_NUM addresses then once we past the last mentioned 1772 line we should be able to quit if no_default_output is true, 1773 or otherwise quickly copy input to output. Now whether this 1774 optimization is a win or not depends on how cheaply we can 1775 implement this for the cases where it doesn't help, as 1776 compared against how much time is saved. One semantic 1777 difference (which I think is an improvement) is that *this* 1778 version will terminate after printing line two in the script 1779 "yes | sed -n 2p". 1780 1781 Don't use this when in-place editing is active, because line 1782 numbers restart each time then. */ 1783 else if (!separate_files) 1784 { 1785 if (cur_cmd->a1->addr_type == ADDR_IS_NUM 1786 && (cur_cmd->a2 1787 ? cur_cmd->range_state == RANGE_CLOSED 1788 : cur_cmd->a1->addr_number < input->line_number)) 1789 { 1790 /* Skip this address next time */ 1791 cur_cmd->addr_bang = !cur_cmd->addr_bang; 1792 cur_cmd->a1->addr_type = ADDR_IS_NULL; 1793 if (cur_cmd->a2) 1794 cur_cmd->a2->addr_type = ADDR_IS_NULL; 1795 1796 /* can we make an optimization? */ 1797 if (cur_cmd->addr_bang) 1798 { 1799 if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't' 1800 || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}') 1801 branches--; 1802 1803 cur_cmd->cmd = '#'; /* replace with no-op */ 1804 if (branches == 0) 1805 cur_cmd = shrink_program(vec, cur_cmd); 1806 if (!cur_cmd && no_default_output) 1807 return 0; 1808 end_cmd = vec->v + vec->v_length; 1809 if (!cur_cmd) 1810 cur_cmd = end_cmd; 1811 continue; 1812 } 1813 } 1814 } 1815 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ 1816 1817 /* this is buried down here so that a "continue" statement can skip it */ 1818 ++cur_cmd; 1819 } 1820 1821 if (!no_default_output) 1822 output_line(line.active, line.length, line.chomped, &output_file); 1823 return -1; 1824 } 1825 1826 1827 1829 /* Apply the compiled script to all the named files. */ 1830 int 1831 process_files(the_program, argv) 1832 struct vector *the_program; 1833 char **argv; 1834 { 1835 static char dash[] = "-"; 1836 static char *stdin_argv[2] = { dash, NULL }; 1837 struct input input; 1838 int status; 1839 1840 line_init(&line, NULL, INITIAL_BUFFER_SIZE); 1841 line_init(&hold, NULL, 0); 1842 line_init(&buffer, NULL, 0); 1843 1844 #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION 1845 branches = count_branches(the_program); 1846 #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ 1847 input.reset_at_next_file = true; 1848 if (argv && *argv) 1849 input.file_list = argv; 1850 else if (in_place_extension) 1851 panic(_("no input files")); 1852 else 1853 input.file_list = stdin_argv; 1854 1855 input.bad_count = 0; 1856 input.line_number = 0; 1857 input.read_fn = read_always_fail; 1858 input.fp = NULL; 1859 1860 status = EXIT_SUCCESS; 1861 while (read_pattern_space(&input, the_program, false)) 1862 { 1863 status = execute_program(the_program, &input); 1864 if (status == -1) 1865 status = EXIT_SUCCESS; 1866 else 1867 break; 1868 } 1869 closedown(&input); 1870 1871 #ifdef DEBUG_LEAKS 1872 /* We're about to exit, so these free()s are redundant. 1873 But if we're running under a memory-leak detecting 1874 implementation of malloc(), we want to explicitly 1875 deallocate in order to avoid extraneous noise from 1876 the allocator. */ 1877 release_append_queue(); 1878 FREE(buffer.text); 1879 FREE(hold.text); 1880 FREE(line.text); 1881 FREE(s_accum.text); 1882 #endif /*DEBUG_LEAKS*/ 1883 1884 if (input.bad_count) 1885 status = 2; 1886 1887 return status; 1888 } 1889