Home | History | Annotate | Download | only in sed
      1 /*  GNU SED, a batch stream editor.
      2     Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008
      3     Free Software Foundation, Inc.
      4 
      5     This program is free software; you can redistribute it and/or modify
      6     it under the terms of the GNU General Public License as published by
      7     the Free Software Foundation; either version 3, or (at your option)
      8     any later version.
      9 
     10     This program is distributed in the hope that it will be useful,
     11     but WITHOUT ANY WARRANTY; without even the implied warranty of
     12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13     GNU General Public License for more details.
     14 
     15     You should have received a copy of the GNU General Public License
     16     along with this program; if not, write to the Free Software
     17     Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
     18 
     19 /* compile.c: translate sed source into internal form */
     20 
     21 #include "sed.h"
     22 #include <stdio.h>
     23 #include <ctype.h>
     24 
     25 #ifdef HAVE_STRINGS_H
     26 # include <strings.h>
     27 # ifdef HAVE_MEMORY_H
     28 #  include <memory.h>
     29 # endif
     30 #else
     31 # include <string.h>
     32 #endif /* HAVE_STRINGS_H */
     33 
     34 #ifdef HAVE_STDLIB_H
     35 # include <stdlib.h>
     36 #endif
     37 #ifndef EXIT_FAILURE
     38 # define EXIT_FAILURE 1
     39 #endif
     40 
     41 #ifdef HAVE_SYS_TYPES_H
     42 # include <sys/types.h>
     43 #endif
     44 
     45 #include <obstack.h>
     46 
     47 
     48 #define YMAP_LENGTH		256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
     50 #define VECTOR_ALLOC_INCREMENT	40
     51 
     52 /* let's not confuse text editors that have only dumb bracket-matching... */
     53 #define OPEN_BRACKET	'['
     54 #define CLOSE_BRACKET	']'
     55 #define OPEN_BRACE	'{'
     56 #define CLOSE_BRACE	'}'
     57 
     58 struct prog_info {
     59   /* When we're reading a script command from a string, `prog.base'
     60      points to the first character in the string, 'prog.cur' points
     61      to the current character in the string, and 'prog.end' points
     62      to the end of the string.  This allows us to compile script
     63      strings that contain nulls. */
     64   const unsigned char *base;
     65   const unsigned char *cur;
     66   const unsigned char *end;
     67 
     68   /* This is the current script file.  If it is NULL, we are reading
     69      from a string stored at `prog.cur' instead.  If both `prog.file'
     70      and `prog.cur' are NULL, we're in trouble! */
     71   FILE *file;
     72 };
     73 
     74 /* Information used to give out useful and informative error messages. */
     75 struct error_info {
     76   /* This is the name of the current script file. */
     77   const char *name;
     78 
     79   /* This is the number of the current script line that we're compiling. */
     80   countT line;
     81 
     82   /* This is the index of the "-e" expressions on the command line. */
     83   countT string_expr_count;
     84 };
     85 
     86 
     87 /* Label structure used to resolve GOTO's, labels, and block beginnings. */
     88 struct sed_label {
     89   countT v_index;		/* index of vector element being referenced */
     90   char *name;			/* NUL-terminated name of the label */
     91   struct error_info err_info;	/* track where `{}' blocks start */
     92   struct sed_label *next;	/* linked list (stack) */
     93 };
     94 
     95 struct special_files {
     96   struct output outf;
     97   FILE **pfp;
     98 };
     99 
    100 FILE *my_stdin, *my_stdout, *my_stderr;
    101 struct special_files special_files[] = {
    102   { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
    103   { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
    104   { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
    105   { { NULL, false, NULL, NULL }, NULL }
    106 };
    107 
    108 
    109 /* Where we are in the processing of the input. */
    111 static struct prog_info prog;
    112 static struct error_info cur_input;
    113 
    114 /* Information about labels and jumps-to-labels.  This is used to do
    115    the required backpatching after we have compiled all the scripts. */
    116 static struct sed_label *jumps = NULL;
    117 static struct sed_label *labels = NULL;
    118 
    119 /* We wish to detect #n magic only in the first input argument;
    120    this flag tracks when we have consumed the first file of input. */
    121 static bool first_script = true;
    122 
    123 /* Allow for scripts like "sed -e 'i\' -e foo": */
    124 static struct buffer *pending_text = NULL;
    125 static struct text_buf *old_text_buf = NULL;
    126 
    127 /* Information about block start positions.  This is used to backpatch
    128    block end positions. */
    129 static struct sed_label *blocks = NULL;
    130 
    131 /* Use an obstack for compilation. */
    132 static struct obstack obs;
    133 
    134 /* Various error messages we may want to print */
    135 static const char errors[] =
    136   "multiple `!'s\0"
    137   "unexpected `,'\0"
    138   "invalid usage of +N or ~N as first address\0"
    139   "unmatched `{'\0"
    140   "unexpected `}'\0"
    141   "extra characters after command\0"
    142   "expected \\ after `a', `c' or `i'\0"
    143   "`}' doesn't want any addresses\0"
    144   ": doesn't want any addresses\0"
    145   "comments don't accept any addresses\0"
    146   "missing command\0"
    147   "command only uses one address\0"
    148   "unterminated address regex\0"
    149   "unterminated `s' command\0"
    150   "unterminated `y' command\0"
    151   "unknown option to `s'\0"
    152   "multiple `p' options to `s' command\0"
    153   "multiple `g' options to `s' command\0"
    154   "multiple number options to `s' command\0"
    155   "number option to `s' command may not be zero\0"
    156   "strings for `y' command are different lengths\0"
    157   "delimiter character is not a single-byte character\0"
    158   "expected newer version of sed\0"
    159   "invalid usage of line address 0\0"
    160   "unknown command: `%c'";
    161 
    162 #define BAD_BANG (errors)
    163 #define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
    164 #define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
    165 #define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
    166 #define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
    167 #define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
    168 #define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
    169 #define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
    170 #define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
    171 #define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
    172 #define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
    173 #define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
    174 #define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
    175 #define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
    176 #define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
    177 #define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
    178 #define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
    179 #define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
    180 #define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
    181 #define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
    182 #define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
    183 #define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
    184 #define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
    185 #define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
    186 #define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
    187 #define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
    188 
    189 static struct output *file_read = NULL;
    190 static struct output *file_write = NULL;
    191 
    192 
    193 /* Complain about an unknown command and exit. */
    195 void
    196 bad_command(ch)
    197   char ch;
    198 {
    199   const char *msg = _(UNKNOWN_CMD);
    200   char *unknown_cmd = xmalloc(strlen(msg));
    201   sprintf(unknown_cmd, msg, ch);
    202   bad_prog(unknown_cmd);
    203 }
    204 
    205 /* Complain about a programming error and exit. */
    206 void
    207 bad_prog(why)
    208   const char *why;
    209 {
    210   if (cur_input.name)
    211     fprintf(stderr, _("%s: file %s line %lu: %s\n"),
    212 	    myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
    213   else
    214     fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
    215 	    myname,
    216 	    CAST(unsigned long)cur_input.string_expr_count,
    217 	    CAST(unsigned long)(prog.cur-prog.base),
    218 	    why);
    219   exit(EXIT_FAILURE);
    220 }
    221 
    222 
    223 /* Read the next character from the program.  Return EOF if there isn't
    225    anything to read.  Keep cur_input.line up to date, so error messages
    226    can be meaningful. */
    227 static int inchar P_((void));
    228 static int
    229 inchar()
    230 {
    231   int ch = EOF;
    232 
    233   if (prog.cur)
    234     {
    235       if (prog.cur < prog.end)
    236 	ch = *prog.cur++;
    237     }
    238   else if (prog.file)
    239     {
    240       if (!feof(prog.file))
    241 	ch = getc(prog.file);
    242     }
    243   if (ch == '\n')
    244     ++cur_input.line;
    245   return ch;
    246 }
    247 
    248 /* unget `ch' so the next call to inchar will return it.   */
    249 static void savchar P_((int ch));
    250 static void
    251 savchar(ch)
    252   int ch;
    253 {
    254   if (ch == EOF)
    255     return;
    256   if (ch == '\n' && cur_input.line > 0)
    257     --cur_input.line;
    258   if (prog.cur)
    259     {
    260       if (prog.cur <= prog.base || *--prog.cur != ch)
    261 	panic("Called savchar() with unexpected pushback (%x)",
    262 	      CAST(unsigned char)ch);
    263     }
    264   else
    265     ungetc(ch, prog.file);
    266 }
    267 
    268 /* Read the next non-blank character from the program.  */
    269 static int in_nonblank P_((void));
    270 static int
    271 in_nonblank()
    272 {
    273   int ch;
    274   do
    275     ch = inchar();
    276     while (ISBLANK(ch));
    277   return ch;
    278 }
    279 
    280 /* Read an integer value from the program.  */
    281 static countT in_integer P_((int ch));
    282 static countT
    283 in_integer(ch)
    284   int ch;
    285 {
    286   countT num = 0;
    287 
    288   while (ISDIGIT(ch))
    289     {
    290       num = num * 10 + ch - '0';
    291       ch = inchar();
    292     }
    293   savchar(ch);
    294   return num;
    295 }
    296 
    297 static int add_then_next P_((struct buffer *b, int ch));
    298 static int
    299 add_then_next(b, ch)
    300   struct buffer *b;
    301   int ch;
    302 {
    303   add1_buffer(b, ch);
    304   return inchar();
    305 }
    306 
    307 static char * convert_number P_((char *, char *, const char *, int, int, int));
    308 static char *
    309 convert_number(result, buf, bufend, base, maxdigits, default_char)
    310   char *result;
    311   char *buf;
    312   const char *bufend;
    313   int base;
    314   int maxdigits;
    315   int default_char;
    316 {
    317   int n = 0;
    318   char *p;
    319 
    320   for (p=buf; p < bufend && maxdigits-- > 0; ++p)
    321     {
    322       int d = -1;
    323       switch (*p)
    324 	{
    325 	case '0': d = 0x0; break;
    326 	case '1': d = 0x1; break;
    327 	case '2': d = 0x2; break;
    328 	case '3': d = 0x3; break;
    329 	case '4': d = 0x4; break;
    330 	case '5': d = 0x5; break;
    331 	case '6': d = 0x6; break;
    332 	case '7': d = 0x7; break;
    333 	case '8': d = 0x8; break;
    334 	case '9': d = 0x9; break;
    335 	case 'A': case 'a': d = 0xa; break;
    336 	case 'B': case 'b': d = 0xb; break;
    337 	case 'C': case 'c': d = 0xc; break;
    338 	case 'D': case 'd': d = 0xd; break;
    339 	case 'E': case 'e': d = 0xe; break;
    340 	case 'F': case 'f': d = 0xf; break;
    341 	}
    342       if (d < 0 || base <= d)
    343 	break;
    344       n = n * base + d;
    345     }
    346   if (p == buf)
    347     *result = default_char;
    348   else
    349     *result = n;
    350   return p;
    351 }
    352 
    353 
    354 /* Read in a filename for a `r', `w', or `s///w' command. */
    356 static struct buffer *read_filename P_((void));
    357 static struct buffer *
    358 read_filename()
    359 {
    360   struct buffer *b;
    361   int ch;
    362 
    363   b = init_buffer();
    364   ch = in_nonblank();
    365   while (ch != EOF && ch != '\n')
    366     {
    367 #if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
    368       if (posixicity == POSIXLY_EXTENDED)
    369 	if (ch == ';' || ch == '#')
    370 	  {
    371 	    savchar(ch);
    372 	    break;
    373 	  }
    374 #endif
    375       ch = add_then_next(b, ch);
    376     }
    377   add1_buffer(b, '\0');
    378   return b;
    379 }
    380 
    381 static struct output *get_openfile P_((struct output **file_ptrs, const char *mode, int fail));
    382 static struct output *
    383 get_openfile(file_ptrs, mode, fail)
    384      struct output **file_ptrs;
    385      const char *mode;
    386      int fail;
    387 {
    388   struct buffer *b;
    389   char *file_name;
    390   struct output *p;
    391 
    392   b = read_filename();
    393   file_name = get_buffer(b);
    394   for (p=*file_ptrs; p; p=p->link)
    395     if (strcmp(p->name, file_name) == 0)
    396       break;
    397 
    398   if (posixicity == POSIXLY_EXTENDED)
    399     {
    400       /* Check whether it is a special file (stdin, stdout or stderr) */
    401       struct special_files *special = special_files;
    402 
    403       /* std* sometimes are not constants, so they
    404          cannot be used in the initializer for special_files */
    405       my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
    406       for (special = special_files; special->outf.name; special++)
    407         if (strcmp(special->outf.name, file_name) == 0)
    408           {
    409 	    special->outf.fp = *special->pfp;
    410 	    free_buffer (b);
    411 	    return &special->outf;
    412           }
    413     }
    414 
    415   if (!p)
    416     {
    417       p = OB_MALLOC(&obs, 1, struct output);
    418       p->name = ck_strdup(file_name);
    419       p->fp = ck_fopen(p->name, mode, fail);
    420       p->missing_newline = false;
    421       p->link = *file_ptrs;
    422       *file_ptrs = p;
    423     }
    424   free_buffer(b);
    425   return p;
    426 }
    427 
    428 
    429 static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
    431 static struct sed_cmd *
    432 next_cmd_entry(vectorp)
    433   struct vector **vectorp;
    434 {
    435   struct sed_cmd *cmd;
    436   struct vector *v;
    437 
    438   v = *vectorp;
    439   if (v->v_length == v->v_allocated)
    440     {
    441       v->v_allocated += VECTOR_ALLOC_INCREMENT;
    442       v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
    443     }
    444 
    445   cmd = v->v + v->v_length;
    446   cmd->a1 = NULL;
    447   cmd->a2 = NULL;
    448   cmd->range_state = RANGE_INACTIVE;
    449   cmd->addr_bang = false;
    450   cmd->cmd = '\0';	/* something invalid, to catch bugs early */
    451 
    452   *vectorp  = v;
    453   return cmd;
    454 }
    455 
    456 static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
    457 static int
    458 snarf_char_class(b, cur_stat)
    459   struct buffer *b;
    460   mbstate_t *cur_stat;
    461 {
    462   int ch;
    463   int state = 0;
    464   int delim;
    465   bool pending_mb = 0;
    466 
    467   ch = inchar();
    468   if (ch == '^')
    469     ch = add_then_next(b, ch);
    470   if (ch == CLOSE_BRACKET)
    471     ch = add_then_next(b, ch);
    472 
    473   /* States are:
    474 	0 outside a collation element, character class or collation class
    475 	1 after the bracket
    476 	2 after the opening ./:/=
    477 	3 after the closing ./:/= */
    478 
    479   for (;; ch = add_then_next (b, ch))
    480     {
    481       pending_mb = BRLEN (ch, cur_stat) != 1;
    482 
    483       switch (ch)
    484 	{
    485 	case EOF:
    486 	case '\n':
    487 	  return ch;
    488 
    489 	case '.':
    490 	case ':':
    491 	case '=':
    492 	  if (pending_mb)
    493 	    continue;
    494 
    495 	  if (state == 1)
    496 	    {
    497 	      delim = ch;
    498 	      state = 2;
    499 	    }
    500 	  else if (state == 2 && ch == delim)
    501 	    state = 3;
    502 	  else
    503 	    break;
    504 
    505 	  continue;
    506 
    507 	case OPEN_BRACKET:
    508 	  if (pending_mb)
    509 	    continue;
    510 
    511 	  if (state == 0)
    512 	    state = 1;
    513 	  continue;
    514 
    515 	case CLOSE_BRACKET:
    516 	  if (pending_mb)
    517 	    continue;
    518 
    519 	  if (state == 0 || state == 1)
    520 	    return ch;
    521 	  else if (state == 3)
    522 	    state = 0;
    523 
    524 	  break;
    525 
    526 	default:
    527 	  break;
    528 	}
    529 
    530       /* Getting a character different from .=: whilst in state 1
    531          goes back to state 0, getting a character different from ]
    532          whilst in state 3 goes back to state 2.  */
    533       state &= ~1;
    534     }
    535 }
    536 
    537 static struct buffer *match_slash P_((int slash, int regex));
    538 static struct buffer *
    539 match_slash(slash, regex)
    540   int slash;
    541   int regex;
    542 {
    543   struct buffer *b;
    544   int ch;
    545   mbstate_t cur_stat;
    546 
    547   memset (&cur_stat, 0, sizeof (mbstate_t));
    548 
    549   /* We allow only 1 byte characters for a slash.  */
    550   if (BRLEN (slash, &cur_stat) == -2)
    551     bad_prog (BAD_DELIM);
    552 
    553   memset (&cur_stat, 0, sizeof (mbstate_t));
    554 
    555   b = init_buffer();
    556   while ((ch = inchar()) != EOF && ch != '\n')
    557     {
    558       bool pending_mb = !MBSINIT (&cur_stat);
    559       if (BRLEN (ch, &cur_stat) == 1 && !pending_mb)
    560 	{
    561 	  if (ch == slash)
    562 	    return b;
    563 	  else if (ch == '\\')
    564 	    {
    565 	      ch = inchar();
    566 	      if (ch == EOF)
    567 	        break;
    568 #ifndef REG_PERL
    569 	      else if (ch == 'n' && regex)
    570 	        ch = '\n';
    571 #endif
    572 	      else if (ch != '\n' && (ch != slash || (!regex && ch == '&')))
    573 	        add1_buffer(b, '\\');
    574 	    }
    575           else if (ch == OPEN_BRACKET && regex)
    576 	    {
    577 	      add1_buffer(b, ch);
    578 	      ch = snarf_char_class(b, &cur_stat);
    579 	      if (ch != CLOSE_BRACKET)
    580 	        break;
    581 	    }
    582 	}
    583 
    584       add1_buffer(b, ch);
    585     }
    586 
    587   if (ch == '\n')
    588     savchar(ch);	/* for proper line number in error report */
    589   free_buffer(b);
    590   return NULL;
    591 }
    592 
    593 static int mark_subst_opts P_((struct subst *cmd));
    594 static int
    595 mark_subst_opts(cmd)
    596   struct subst *cmd;
    597 {
    598   int flags = 0;
    599   int ch;
    600 
    601   cmd->global = false;
    602   cmd->print = false;
    603   cmd->eval = false;
    604   cmd->numb = 0;
    605   cmd->outf = NULL;
    606 
    607   for (;;)
    608     switch ( (ch = in_nonblank()) )
    609       {
    610       case 'i':	/* GNU extension */
    611       case 'I':	/* GNU extension */
    612 	if (posixicity == POSIXLY_BASIC)
    613 	  bad_prog(_(UNKNOWN_S_OPT));
    614 	flags |= REG_ICASE;
    615 	break;
    616 
    617 #ifdef REG_PERL
    618       case 's':	/* GNU extension */
    619       case 'S':	/* GNU extension */
    620 	if (posixicity == POSIXLY_BASIC)
    621 	  bad_prog(_(UNKNOWN_S_OPT));
    622 	if (extended_regexp_flags & REG_PERL)
    623 	  flags |= REG_DOTALL;
    624 	break;
    625 
    626       case 'x':	/* GNU extension */
    627       case 'X':	/* GNU extension */
    628 	if (posixicity == POSIXLY_BASIC)
    629 	  bad_prog(_(UNKNOWN_S_OPT));
    630 	if (extended_regexp_flags & REG_PERL)
    631 	  flags |= REG_EXTENDED;
    632 	break;
    633 #endif
    634 
    635       case 'm':	/* GNU extension */
    636       case 'M':	/* GNU extension */
    637 	if (posixicity == POSIXLY_BASIC)
    638 	  bad_prog(_(UNKNOWN_S_OPT));
    639 	flags |= REG_NEWLINE;
    640 	break;
    641 
    642       case 'e':
    643 	cmd->eval = true;
    644 	break;
    645 
    646       case 'p':
    647 	if (cmd->print)
    648 	  bad_prog(_(EXCESS_P_OPT));
    649 	cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
    650 	break;
    651 
    652       case 'g':
    653 	if (cmd->global)
    654 	  bad_prog(_(EXCESS_G_OPT));
    655 	cmd->global = true;
    656 	break;
    657 
    658       case 'w':
    659 	cmd->outf = get_openfile(&file_write, "w", true);
    660 	return flags;
    661 
    662       case '0': case '1': case '2': case '3': case '4':
    663       case '5': case '6': case '7': case '8': case '9':
    664 	if (cmd->numb)
    665 	  bad_prog(_(EXCESS_N_OPT));
    666 	cmd->numb = in_integer(ch);
    667 	if (!cmd->numb)
    668 	  bad_prog(_(ZERO_N_OPT));
    669 	break;
    670 
    671       case CLOSE_BRACE:
    672       case '#':
    673 	savchar(ch);
    674 	/* Fall Through */
    675       case EOF:
    676       case '\n':
    677       case ';':
    678 	return flags;
    679 
    680       case '\r':
    681 	if (inchar() == '\n')
    682 	  return flags;
    683 	/* FALLTHROUGH */
    684 
    685       default:
    686 	bad_prog(_(UNKNOWN_S_OPT));
    687 	/*NOTREACHED*/
    688       }
    689 }
    690 
    691 
    692 /* read in a label for a `:', `b', or `t' command */
    694 static char *read_label P_((void));
    695 static char *
    696 read_label()
    697 {
    698   struct buffer *b;
    699   int ch;
    700   char *ret;
    701 
    702   b = init_buffer();
    703   ch = in_nonblank();
    704 
    705   while (ch != EOF && ch != '\n'
    706 	 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
    707     ch = add_then_next (b, ch);
    708 
    709   savchar(ch);
    710   add1_buffer(b, '\0');
    711   ret = ck_strdup(get_buffer(b));
    712   free_buffer(b);
    713   return ret;
    714 }
    715 
    716 /* Store a label (or label reference) created by a `:', `b', or `t'
    717    command so that the jump to/from the label can be backpatched after
    718    compilation is complete, or a reference created by a `{' to be
    719    backpatched when the corresponding `}' is found.  */
    720 static struct sed_label *setup_label
    721   P_((struct sed_label *, countT, char *, const struct error_info *));
    722 static struct sed_label *
    723 setup_label(list, idx, name, err_info)
    724   struct sed_label *list;
    725   countT idx;
    726   char *name;
    727   const struct error_info *err_info;
    728 {
    729   struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
    730   ret->v_index = idx;
    731   ret->name = name;
    732   if (err_info)
    733     MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
    734   ret->next = list;
    735   return ret;
    736 }
    737 
    738 static struct sed_label *release_label P_((struct sed_label *list_head));
    739 static struct sed_label *
    740 release_label(list_head)
    741   struct sed_label *list_head;
    742 {
    743   struct sed_label *ret;
    744 
    745   if (!list_head)
    746     return NULL;
    747   ret = list_head->next;
    748 
    749   FREE(list_head->name);
    750 
    751 #if 0
    752   /* We use obstacks */
    753   FREE(list_head);
    754 #endif
    755   return ret;
    756 }
    757 
    758 static struct replacement *new_replacement P_((char *, size_t,
    759 					       enum replacement_types));
    760 static struct replacement *
    761 new_replacement(text, length, type)
    762   char *text;
    763   size_t length;
    764   enum replacement_types type;
    765 {
    766   struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
    767 
    768   r->prefix = text;
    769   r->prefix_length = length;
    770   r->subst_id = -1;
    771   r->repl_type = type;
    772 
    773   /* r-> next = NULL; */
    774   return r;
    775 }
    776 
    777 static void setup_replacement P_((struct subst *, const char *, size_t));
    778 static void
    779 setup_replacement(sub, text, length)
    780      struct subst *sub;
    781      const char *text;
    782      size_t length;
    783 {
    784   char *base;
    785   char *p;
    786   char *text_end;
    787   enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
    788   struct replacement root;
    789   struct replacement *tail;
    790 
    791   sub->max_id = 0;
    792   base = MEMDUP(text, length, char);
    793   length = normalize_text(base, length, TEXT_REPLACEMENT);
    794 
    795   text_end = base + length;
    796   tail = &root;
    797 
    798   for (p=base; p<text_end; ++p)
    799     {
    800       if (*p == '\\')
    801 	{
    802 	  /* Preceding the backslash may be some literal text: */
    803 	  tail = tail->next =
    804 	    new_replacement(base, CAST(size_t)(p - base), repl_type);
    805 
    806 	  repl_type = save_type;
    807 
    808 	  /* Skip the backslash and look for a numeric back-reference,
    809 	     or a case-munging escape if not in POSIX mode: */
    810 	  ++p;
    811 	  if (p == text_end)
    812 	    ++tail->prefix_length;
    813 
    814 	  else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p))
    815 	    {
    816 	      p[-1] = *p;
    817 	      ++tail->prefix_length;
    818 	    }
    819 
    820 	  else
    821 	    switch (*p)
    822 	      {
    823 	      case '0': case '1': case '2': case '3': case '4':
    824 	      case '5': case '6': case '7': case '8': case '9':
    825 		tail->subst_id = *p - '0';
    826 		if (sub->max_id < tail->subst_id)
    827 		  sub->max_id = tail->subst_id;
    828 		break;
    829 
    830 	      case 'L':
    831 		repl_type = REPL_LOWERCASE;
    832 		save_type = REPL_LOWERCASE;
    833 		break;
    834 
    835 	      case 'U':
    836 		repl_type = REPL_UPPERCASE;
    837 		save_type = REPL_UPPERCASE;
    838 		break;
    839 
    840 	      case 'E':
    841 		repl_type = REPL_ASIS;
    842 		save_type = REPL_ASIS;
    843 		break;
    844 
    845 	      case 'l':
    846 		save_type = repl_type;
    847 		repl_type |= REPL_LOWERCASE_FIRST;
    848 		break;
    849 
    850 	      case 'u':
    851 		save_type = repl_type;
    852 		repl_type |= REPL_UPPERCASE_FIRST;
    853 		break;
    854 
    855 	      default:
    856 		p[-1] = *p;
    857 		++tail->prefix_length;
    858 	      }
    859 
    860 	  base = p + 1;
    861 	}
    862       else if (*p == '&')
    863 	{
    864 	  /* Preceding the ampersand may be some literal text: */
    865 	  tail = tail->next =
    866 	    new_replacement(base, CAST(size_t)(p - base), repl_type);
    867 
    868 	  repl_type = save_type;
    869 	  tail->subst_id = 0;
    870 	  base = p + 1;
    871 	}
    872   }
    873   /* There may be some trailing literal text: */
    874   if (base < text_end)
    875     tail = tail->next =
    876       new_replacement(base, CAST(size_t)(text_end - base), repl_type);
    877 
    878   tail->next = NULL;
    879   sub->replacement = root.next;
    880 }
    881 
    882 static void read_text P_((struct text_buf *buf, int leadin_ch));
    883 static void
    884 read_text(buf, leadin_ch)
    885   struct text_buf *buf;
    886   int leadin_ch;
    887 {
    888   int ch;
    889 
    890   /* Should we start afresh (as opposed to continue a partial text)? */
    891   if (buf)
    892     {
    893       if (pending_text)
    894 	free_buffer(pending_text);
    895       pending_text = init_buffer();
    896       buf->text = NULL;
    897       buf->text_length = 0;
    898       old_text_buf = buf;
    899     }
    900   /* assert(old_text_buf != NULL); */
    901 
    902   if (leadin_ch == EOF)
    903     return;
    904 
    905   if (leadin_ch != '\n')
    906     add1_buffer(pending_text, leadin_ch);
    907 
    908   ch = inchar();
    909   while (ch != EOF && ch != '\n')
    910     {
    911       if (ch == '\\')
    912 	{
    913 	  ch = inchar();
    914 	  if (ch != EOF)
    915 	    add1_buffer (pending_text, '\\');
    916 	}
    917 
    918       if (ch == EOF)
    919 	{
    920 	  add1_buffer (pending_text, '\n');
    921 	  return;
    922 	}
    923 
    924       ch = add_then_next (pending_text, ch);
    925     }
    926 
    927   add1_buffer(pending_text, '\n');
    928   if (!buf)
    929     buf = old_text_buf;
    930   buf->text_length = normalize_text (get_buffer (pending_text),
    931 				     size_buffer (pending_text), TEXT_BUFFER);
    932   buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
    933   free_buffer(pending_text);
    934   pending_text = NULL;
    935 }
    936 
    937 
    938 /* Try to read an address for a sed command.  If it succeeds,
    940    return non-zero and store the resulting address in `*addr'.
    941    If the input doesn't look like an address read nothing
    942    and return zero.  */
    943 static bool compile_address P_((struct addr *addr, int ch));
    944 static bool
    945 compile_address(addr, ch)
    946   struct addr *addr;
    947   int ch;
    948 {
    949   addr->addr_type = ADDR_IS_NULL;
    950   addr->addr_step = 0;
    951   addr->addr_number = ~(countT)0;  /* extremely unlikely to ever match */
    952   addr->addr_regex = NULL;
    953 
    954   if (ch == '/' || ch == '\\')
    955     {
    956       int flags = 0;
    957       struct buffer *b;
    958       addr->addr_type = ADDR_IS_REGEX;
    959       if (ch == '\\')
    960 	ch = inchar();
    961       if ( !(b = match_slash(ch, true)) )
    962 	bad_prog(_(UNTERM_ADDR_RE));
    963 
    964       for(;;)
    965 	{
    966 	  ch = in_nonblank();
    967 	  if (posixicity == POSIXLY_BASIC)
    968 	    goto posix_address_modifier;
    969           switch(ch)
    970 	    {
    971 	    case 'I':	/* GNU extension */
    972 	      flags |= REG_ICASE;
    973 	      break;
    974 
    975 #ifdef REG_PERL
    976 	    case 'S':	/* GNU extension */
    977 	      if (extended_regexp_flags & REG_PERL)
    978 		flags |= REG_DOTALL;
    979 	      break;
    980 
    981 	    case 'X':	/* GNU extension */
    982 	      if (extended_regexp_flags & REG_PERL)
    983 		flags |= REG_EXTENDED;
    984 	      break;
    985 #endif
    986 
    987 	    case 'M':	/* GNU extension */
    988 	      flags |= REG_NEWLINE;
    989 	      break;
    990 
    991 	    default:
    992 	    posix_address_modifier:
    993 	      savchar (ch);
    994 	      addr->addr_regex = compile_regex (b, flags, 0);
    995 	      free_buffer(b);
    996 	      return true;
    997 	    }
    998 	}
    999     }
   1000   else if (ISDIGIT(ch))
   1001     {
   1002       addr->addr_number = in_integer(ch);
   1003       addr->addr_type = ADDR_IS_NUM;
   1004       ch = in_nonblank();
   1005       if (ch != '~' || posixicity == POSIXLY_BASIC)
   1006 	{
   1007 	  savchar(ch);
   1008 	}
   1009       else
   1010 	{
   1011 	  countT step = in_integer(in_nonblank());
   1012 	  if (step > 0)
   1013 	    {
   1014 	      addr->addr_step = step;
   1015 	      addr->addr_type = ADDR_IS_NUM_MOD;
   1016 	    }
   1017 	}
   1018     }
   1019   else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC)
   1020     {
   1021       addr->addr_step = in_integer(in_nonblank());
   1022       if (addr->addr_step==0)
   1023 	; /* default to ADDR_IS_NULL; forces matching to stop on next line */
   1024       else if (ch == '+')
   1025 	addr->addr_type = ADDR_IS_STEP;
   1026       else
   1027 	addr->addr_type = ADDR_IS_STEP_MOD;
   1028     }
   1029   else if (ch == '$')
   1030     {
   1031       addr->addr_type = ADDR_IS_LAST;
   1032     }
   1033   else
   1034     return false;
   1035 
   1036   return true;
   1037 }
   1038 
   1039 /* Read a program (or a subprogram within `{' `}' pairs) in and store
   1040    the compiled form in `*vector'.  Return a pointer to the new vector.  */
   1041 static struct vector *compile_program P_((struct vector *));
   1042 static struct vector *
   1043 compile_program(vector)
   1044   struct vector *vector;
   1045 {
   1046   struct sed_cmd *cur_cmd;
   1047   struct buffer *b;
   1048   int ch;
   1049 
   1050   if (!vector)
   1051     {
   1052       vector = MALLOC(1, struct vector);
   1053       vector->v = NULL;
   1054       vector->v_allocated = 0;
   1055       vector->v_length = 0;
   1056 
   1057       obstack_init (&obs);
   1058     }
   1059   if (pending_text)
   1060     read_text(NULL, '\n');
   1061 
   1062   for (;;)
   1063     {
   1064       struct addr a;
   1065 
   1066       while ((ch=inchar()) == ';' || ISSPACE(ch))
   1067 	;
   1068       if (ch == EOF)
   1069 	break;
   1070 
   1071       cur_cmd = next_cmd_entry(&vector);
   1072       if (compile_address(&a, ch))
   1073 	{
   1074 	  if (a.addr_type == ADDR_IS_STEP
   1075 	      || a.addr_type == ADDR_IS_STEP_MOD)
   1076 	    bad_prog(_(BAD_STEP));
   1077 
   1078 	  cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
   1079 	  ch = in_nonblank();
   1080 	  if (ch == ',')
   1081 	    {
   1082 	      if (!compile_address(&a, in_nonblank()))
   1083 		bad_prog(_(BAD_COMMA));
   1084 
   1085 	      cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
   1086 	      ch = in_nonblank();
   1087 	    }
   1088 
   1089 	  if ((cur_cmd->a1->addr_type == ADDR_IS_NUM
   1090 	       && cur_cmd->a1->addr_number == 0)
   1091 	      && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX)
   1092 		  || posixicity == POSIXLY_BASIC))
   1093 	    bad_prog(_(INVALID_LINE_0));
   1094 	}
   1095       if (ch == '!')
   1096 	{
   1097 	  cur_cmd->addr_bang = true;
   1098 	  ch = in_nonblank();
   1099 	  if (ch == '!')
   1100 	    bad_prog(_(BAD_BANG));
   1101 	}
   1102 
   1103       /* Do not accept extended commands in --posix mode.  Also,
   1104 	 a few commands only accept one address in that mode.  */
   1105       if (posixicity == POSIXLY_BASIC)
   1106 	switch (ch)
   1107 	  {
   1108 	    case 'e': case 'v': case 'z': case 'L':
   1109 	    case 'Q': case 'T': case 'R': case 'W':
   1110 	      bad_command(ch);
   1111 
   1112 	    case 'a': case 'i': case 'l':
   1113 	    case '=': case 'r':
   1114 	      if (cur_cmd->a2)
   1115 	        bad_prog(_(ONE_ADDR));
   1116 	  }
   1117 
   1118       cur_cmd->cmd = ch;
   1119       switch (ch)
   1120 	{
   1121 	case '#':
   1122 	  if (cur_cmd->a1)
   1123 	    bad_prog(_(NO_SHARP_ADDR));
   1124 	  ch = inchar();
   1125 	  if (ch=='n' && first_script && cur_input.line < 2)
   1126 	    if (   (prog.base && prog.cur==2+prog.base)
   1127 		|| (prog.file && !prog.base && 2==ftell(prog.file)))
   1128 	      no_default_output = true;
   1129 	  while (ch != EOF && ch != '\n')
   1130 	    ch = inchar();
   1131 	  continue;	/* restart the for (;;) loop */
   1132 
   1133 	case 'v':
   1134 	  /* This is an extension.  Programs needing GNU sed might start
   1135 	   * with a `v' command so that other seds will stop.
   1136 	   * We compare the version and ignore POSIXLY_CORRECT.
   1137 	   */
   1138 	  {
   1139 	    char *version = read_label ();
   1140 	    char *compared_version;
   1141 	    compared_version = (*version == '\0') ? "4.0" : version;
   1142 	    if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
   1143 	      bad_prog(_(ANCIENT_VERSION));
   1144 
   1145 	    free (version);
   1146 	    posixicity = POSIXLY_EXTENDED;
   1147 	  }
   1148 	  continue;
   1149 
   1150 	case '{':
   1151 	  blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
   1152 	  cur_cmd->addr_bang = !cur_cmd->addr_bang;
   1153 	  break;
   1154 
   1155 	case '}':
   1156 	  if (!blocks)
   1157 	    bad_prog(_(EXCESS_CLOSE_BRACE));
   1158 	  if (cur_cmd->a1)
   1159 	    bad_prog(_(NO_CLOSE_BRACE_ADDR));
   1160 	  ch = in_nonblank();
   1161 	  if (ch == CLOSE_BRACE || ch == '#')
   1162 	    savchar(ch);
   1163 	  else if (ch != EOF && ch != '\n' && ch != ';')
   1164 	    bad_prog(_(EXCESS_JUNK));
   1165 
   1166 	  vector->v[blocks->v_index].x.jump_index = vector->v_length;
   1167 	  blocks = release_label(blocks);	/* done with this entry */
   1168 	  break;
   1169 
   1170 	case 'e':
   1171 	  ch = in_nonblank();
   1172 	  if (ch == EOF || ch == '\n')
   1173 	    {
   1174 	      cur_cmd->x.cmd_txt.text_length = 0;
   1175 	      break;
   1176 	    }
   1177 	  else
   1178 	    goto read_text_to_slash;
   1179 
   1180 	case 'a':
   1181 	case 'i':
   1182 	case 'c':
   1183 	  ch = in_nonblank();
   1184 
   1185 	read_text_to_slash:
   1186 	  if (ch == EOF)
   1187 	    bad_prog(_(EXPECTED_SLASH));
   1188 
   1189 	  if (ch == '\\')
   1190 	    ch = inchar();
   1191 	  else
   1192 	    {
   1193 	      if (posixicity == POSIXLY_BASIC)
   1194 		bad_prog(_(EXPECTED_SLASH));
   1195 	      savchar(ch);
   1196 	      ch = '\n';
   1197 	    }
   1198 
   1199 	  read_text(&cur_cmd->x.cmd_txt, ch);
   1200 	  break;
   1201 
   1202 	case ':':
   1203 	  if (cur_cmd->a1)
   1204 	    bad_prog(_(NO_COLON_ADDR));
   1205 	  labels = setup_label(labels, vector->v_length, read_label(), NULL);
   1206 	  break;
   1207 
   1208 	case 'T':
   1209 	case 'b':
   1210 	case 't':
   1211 	  jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
   1212 	  break;
   1213 
   1214 	case 'Q':
   1215 	case 'q':
   1216 	  if (cur_cmd->a2)
   1217 	    bad_prog(_(ONE_ADDR));
   1218 	  /* Fall through */
   1219 
   1220 	case 'L':
   1221 	case 'l':
   1222 	  ch = in_nonblank();
   1223 	  if (ISDIGIT(ch) && posixicity != POSIXLY_BASIC)
   1224 	    {
   1225 	      cur_cmd->x.int_arg = in_integer(ch);
   1226 	      ch = in_nonblank();
   1227 	    }
   1228 	  else
   1229 	    cur_cmd->x.int_arg = -1;
   1230 
   1231 	  if (ch == CLOSE_BRACE || ch == '#')
   1232 	    savchar(ch);
   1233 	  else if (ch != EOF && ch != '\n' && ch != ';')
   1234 	    bad_prog(_(EXCESS_JUNK));
   1235 
   1236 	  break;
   1237 
   1238 	case '=':
   1239 	case 'd':
   1240 	case 'D':
   1241 	case 'g':
   1242 	case 'G':
   1243 	case 'h':
   1244 	case 'H':
   1245 	case 'n':
   1246 	case 'N':
   1247 	case 'p':
   1248 	case 'P':
   1249 	case 'z':
   1250 	case 'x':
   1251 	  ch = in_nonblank();
   1252 	  if (ch == CLOSE_BRACE || ch == '#')
   1253 	    savchar(ch);
   1254 	  else if (ch != EOF && ch != '\n' && ch != ';')
   1255 	    bad_prog(_(EXCESS_JUNK));
   1256 	  break;
   1257 
   1258 	case 'r':
   1259 	  b = read_filename();
   1260 	  cur_cmd->x.fname = ck_strdup(get_buffer(b));
   1261 	  free_buffer(b);
   1262 	  break;
   1263 
   1264         case 'R':
   1265 	  cur_cmd->x.fp = get_openfile(&file_read, read_mode, false)->fp;
   1266 	  break;
   1267 
   1268         case 'W':
   1269 	case 'w':
   1270 	  cur_cmd->x.outf = get_openfile(&file_write, "w", true);
   1271 	  break;
   1272 
   1273 	case 's':
   1274 	  {
   1275 	    struct buffer *b2;
   1276 	    int flags;
   1277 	    int slash;
   1278 
   1279 	    slash = inchar();
   1280 	    if ( !(b  = match_slash(slash, true)) )
   1281 	      bad_prog(_(UNTERM_S_CMD));
   1282 	    if ( !(b2 = match_slash(slash, false)) )
   1283 	      bad_prog(_(UNTERM_S_CMD));
   1284 
   1285 	    cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
   1286 	    setup_replacement(cur_cmd->x.cmd_subst,
   1287 			      get_buffer(b2), size_buffer(b2));
   1288 	    free_buffer(b2);
   1289 
   1290 	    flags = mark_subst_opts(cur_cmd->x.cmd_subst);
   1291 	    cur_cmd->x.cmd_subst->regx =
   1292 	      compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
   1293 	    free_buffer(b);
   1294 	  }
   1295 	  break;
   1296 
   1297 	case 'y':
   1298 	  {
   1299 	    size_t len, dest_len;
   1300 	    int slash;
   1301 	    struct buffer *b2;
   1302             char *src_buf, *dest_buf;
   1303 
   1304 	    slash = inchar();
   1305 	    if ( !(b = match_slash(slash, false)) )
   1306 	      bad_prog(_(UNTERM_Y_CMD));
   1307             src_buf = get_buffer(b);
   1308 	    len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
   1309 
   1310             if ( !(b2 = match_slash(slash, false)) )
   1311  	      bad_prog(_(UNTERM_Y_CMD));
   1312             dest_buf = get_buffer(b2);
   1313 	    dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
   1314 
   1315             if (mb_cur_max > 1)
   1316 	      {
   1317                 int i, j, idx, src_char_num;
   1318                 size_t *src_lens = MALLOC(len, size_t);
   1319                 char **trans_pairs;
   1320                 size_t mbclen;
   1321                 mbstate_t cur_stat;
   1322 
   1323                 /* Enumerate how many character the source buffer has.  */
   1324                 memset(&cur_stat, 0, sizeof(mbstate_t));
   1325                 for (i = 0, j = 0; i < len;)
   1326                   {
   1327                     mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
   1328                     /* An invalid sequence, or a truncated multibyte character.
   1329                        We treat it as a singlebyte character.  */
   1330                     if (mbclen == (size_t) -1 || mbclen == (size_t) -2
   1331                         || mbclen == 0)
   1332                       mbclen = 1;
   1333                     src_lens[j++] = mbclen;
   1334                     i += mbclen;
   1335                   }
   1336                 src_char_num = j;
   1337 
   1338                 memset(&cur_stat, 0, sizeof(mbstate_t));
   1339                 idx = 0;
   1340 
   1341                 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
   1342                      src(i) : pointer to i-th source character.
   1343                      dest(i) : pointer to i-th destination character.
   1344                      NULL : terminator */
   1345                 trans_pairs = MALLOC(2 * src_char_num + 1, char*);
   1346                 cur_cmd->x.translatemb = trans_pairs;
   1347                 for (i = 0; i < src_char_num; i++)
   1348                   {
   1349                     if (idx >= dest_len)
   1350                       bad_prog(_(Y_CMD_LEN));
   1351 
   1352                     /* Set the i-th source character.  */
   1353                     trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
   1354                     strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
   1355                     trans_pairs[2 * i][src_lens[i]] = '\0';
   1356                     src_buf += src_lens[i]; /* Forward to next character.  */
   1357 
   1358                     /* Fetch the i-th destination character.  */
   1359                     mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
   1360                     /* An invalid sequence, or a truncated multibyte character.
   1361                        We treat it as a singlebyte character.  */
   1362                     if (mbclen == (size_t) -1 || mbclen == (size_t) -2
   1363                         || mbclen == 0)
   1364                       mbclen = 1;
   1365 
   1366                     /* Set the i-th destination character.  */
   1367                     trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
   1368                     strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
   1369                     trans_pairs[2 * i + 1][mbclen] = '\0';
   1370                     idx += mbclen; /* Forward to next character.  */
   1371                   }
   1372                 trans_pairs[2 * i] = NULL;
   1373                 if (idx != dest_len)
   1374                   bad_prog(_(Y_CMD_LEN));
   1375               }
   1376             else
   1377               {
   1378 	        unsigned char *translate =
   1379 		  OB_MALLOC(&obs, YMAP_LENGTH, unsigned char);
   1380                 unsigned char *ustring = CAST(unsigned char *)src_buf;
   1381 
   1382 		if (len != dest_len)
   1383                   bad_prog(_(Y_CMD_LEN));
   1384 
   1385 	        for (len = 0; len < YMAP_LENGTH; len++)
   1386 	          translate[len] = len;
   1387 
   1388                 while (dest_len--)
   1389                   translate[*ustring++] = (unsigned char)*dest_buf++;
   1390 
   1391 	        cur_cmd->x.translate = translate;
   1392 	      }
   1393 
   1394             if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
   1395               bad_prog(_(EXCESS_JUNK));
   1396 
   1397             free_buffer(b);
   1398             free_buffer(b2);
   1399 	  }
   1400 	break;
   1401 
   1402 	case EOF:
   1403 	  bad_prog(_(NO_COMMAND));
   1404 	  /*NOTREACHED*/
   1405 
   1406 	default:
   1407 	  bad_command (ch);
   1408 	  /*NOTREACHED*/
   1409 	}
   1410 
   1411       /* this is buried down here so that "continue" statements will miss it */
   1412       ++vector->v_length;
   1413     }
   1414   return vector;
   1415 }
   1416 
   1417 
   1418 /* deal with \X escapes */
   1420 size_t
   1421 normalize_text(buf, len, buftype)
   1422   char *buf;
   1423   size_t len;
   1424   enum text_types buftype;
   1425 {
   1426   const char *bufend = buf + len;
   1427   char *p = buf;
   1428   char *q = buf;
   1429 
   1430   /* This variable prevents normalizing text within bracket
   1431      subexpressions when conforming to POSIX.  If 0, we
   1432      are not within a bracket expression.  If -1, we are within a
   1433      bracket expression but are not within [.FOO.], [=FOO=],
   1434      or [:FOO:].  Otherwise, this is the '.', '=', or ':'
   1435      respectively within these three types of subexpressions.  */
   1436   int bracket_state = 0;
   1437 
   1438   int mbclen;
   1439   mbstate_t cur_stat;
   1440   memset(&cur_stat, 0, sizeof(mbstate_t));
   1441 
   1442   while (p < bufend)
   1443     {
   1444       int c;
   1445       mbclen = MBRLEN (p, bufend - p, &cur_stat);
   1446       if (mbclen != 1)
   1447 	{
   1448           /* An invalid sequence, or a truncated multibyte character.
   1449              We treat it as a singlebyte character.  */
   1450           if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
   1451             mbclen = 1;
   1452 
   1453           memmove (q, p, mbclen);
   1454           q += mbclen;
   1455           p += mbclen;
   1456 	  continue;
   1457 	}
   1458 
   1459       if (*p == '\\' && p+1 < bufend && bracket_state == 0)
   1460 	switch ( (c = *++p) )
   1461 	  {
   1462 #if defined __STDC__ && __STDC__-0
   1463 	  case 'a': *q++ = '\a'; p++; continue;
   1464 #else /* Not STDC; we'll just assume ASCII */
   1465 	  case 'a': *q++ = '\007'; p++; continue;
   1466 #endif
   1467 	  /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
   1468 	  case 'f': *q++ = '\f'; p++; continue;
   1469 	  case '\n': /*fall through */
   1470 	  case 'n': *q++ = '\n'; p++; continue;
   1471 	  case 'r': *q++ = '\r'; p++; continue;
   1472 	  case 't': *q++ = '\t'; p++; continue;
   1473 	  case 'v': *q++ = '\v'; p++; continue;
   1474 
   1475 	  case 'd': /* decimal byte */
   1476 	    p = convert_number(q, p+1, bufend, 10, 3, 'd');
   1477 	    q++;
   1478 	    continue;
   1479 
   1480 	  case 'x': /* hexadecimal byte */
   1481 	    p = convert_number(q, p+1, bufend, 16, 2, 'x');
   1482 	    q++;
   1483 	    continue;
   1484 
   1485 #ifdef REG_PERL
   1486 	  case '0': case '1': case '2': case '3':
   1487 	  case '4': case '5': case '6': case '7':
   1488 	    if ((extended_regexp_flags & REG_PERL)
   1489 		&& p+1 < bufend
   1490 		&& p[1] >= '0' && p[1] <= '9')
   1491 	      {
   1492 		p = convert_number(q, p, bufend, 8, 3, *p);
   1493 		q++;
   1494 	      }
   1495 	    else
   1496 	      {
   1497 		/* we just pass the \ up one level for interpretation */
   1498 	        if (buftype != TEXT_BUFFER)
   1499 		  *q++ = '\\';
   1500 	      }
   1501 
   1502 	    continue;
   1503 
   1504 	  case 'o': /* octal byte */
   1505 	    if (!(extended_regexp_flags & REG_PERL))
   1506 	      {
   1507 	        p = convert_number(q, p+1, bufend,  8, 3, 'o');
   1508 		q++;
   1509 	      }
   1510 	    else
   1511 	      {
   1512 	        /* we just pass the \ up one level for interpretation */
   1513 	        if (buftype != TEXT_BUFFER)
   1514 		  *q++ = '\\';
   1515 	      }
   1516 
   1517 	    continue;
   1518 #else
   1519 	  case 'o': /* octal byte */
   1520 	    p = convert_number(q, p+1, bufend,  8, 3, 'o');
   1521 	    q++;
   1522 	    continue;
   1523 #endif
   1524 
   1525 	  case 'c':
   1526 	    if (++p < bufend)
   1527 	      {
   1528 		*q++ = toupper(*p) ^ 0x40;
   1529 		p++;
   1530 		continue;
   1531 	      }
   1532 	    else
   1533 	      {
   1534 	        /* we just pass the \ up one level for interpretation */
   1535 	        if (buftype != TEXT_BUFFER)
   1536 		  *q++ = '\\';
   1537 	        continue;
   1538 	      }
   1539 
   1540 	  default:
   1541 	    /* we just pass the \ up one level for interpretation */
   1542 	    if (buftype != TEXT_BUFFER)
   1543 	      *q++ = '\\';
   1544 	    break;
   1545 	  }
   1546       else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
   1547         switch (*p)
   1548           {
   1549           case '[':
   1550             if (!bracket_state)
   1551               bracket_state = -1;
   1552             break;
   1553 
   1554 	  case ':':
   1555 	  case '.':
   1556 	  case '=':
   1557             if (bracket_state == -1 && p[-1] == '[')
   1558               bracket_state = *p;
   1559             break;
   1560 
   1561           case ']':
   1562             if (bracket_state == 0)
   1563 	      ;
   1564             else if (bracket_state == -1)
   1565               bracket_state = 0;
   1566             else if (p[-2] != bracket_state && p[-1] == bracket_state)
   1567               bracket_state = -1;
   1568             break;
   1569           }
   1570 
   1571       *q++ = *p++;
   1572     }
   1573     return (size_t)(q - buf);
   1574 }
   1575 
   1576 
   1577 /* `str' is a string (from the command line) that contains a sed command.
   1578    Compile the command, and add it to the end of `cur_program'. */
   1579 struct vector *
   1580 compile_string(cur_program, str, len)
   1581   struct vector *cur_program;
   1582   char *str;
   1583   size_t len;
   1584 {
   1585   static countT string_expr_count = 0;
   1586   struct vector *ret;
   1587 
   1588   prog.file = NULL;
   1589   prog.base = CAST(unsigned char *)str;
   1590   prog.cur = prog.base;
   1591   prog.end = prog.cur + len;
   1592 
   1593   cur_input.line = 0;
   1594   cur_input.name = NULL;
   1595   cur_input.string_expr_count = ++string_expr_count;
   1596 
   1597   ret = compile_program(cur_program);
   1598   prog.base = NULL;
   1599   prog.cur = NULL;
   1600   prog.end = NULL;
   1601 
   1602   first_script = false;
   1603   return ret;
   1604 }
   1605 
   1606 /* `cmdfile' is the name of a file containing sed commands.
   1607    Read them in and add them to the end of `cur_program'.
   1608  */
   1609 struct vector *
   1610 compile_file(cur_program, cmdfile)
   1611   struct vector *cur_program;
   1612   const char *cmdfile;
   1613 {
   1614   struct vector *ret;
   1615 
   1616   prog.file = stdin;
   1617   if (cmdfile[0] != '-' || cmdfile[1] != '\0')
   1618     prog.file = ck_fopen(cmdfile, "rt", true);
   1619 
   1620   cur_input.line = 1;
   1621   cur_input.name = cmdfile;
   1622   cur_input.string_expr_count = 0;
   1623 
   1624   ret = compile_program(cur_program);
   1625   if (prog.file != stdin)
   1626     ck_fclose(prog.file);
   1627   prog.file = NULL;
   1628 
   1629   first_script = false;
   1630   return ret;
   1631 }
   1632 
   1633 /* Make any checks which require the whole program to have been read.
   1634    In particular: this backpatches the jump targets.
   1635    Any cleanup which can be done after these checks is done here also.  */
   1636 void
   1637 check_final_program(program)
   1638   struct vector *program;
   1639 {
   1640   struct sed_label *go;
   1641   struct sed_label *lbl;
   1642 
   1643   /* do all "{"s have a corresponding "}"? */
   1644   if (blocks)
   1645     {
   1646       /* update info for error reporting: */
   1647       MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
   1648       bad_prog(_(EXCESS_OPEN_BRACE));
   1649     }
   1650 
   1651   /* was the final command an unterminated a/c/i command? */
   1652   if (pending_text)
   1653     {
   1654       old_text_buf->text_length = size_buffer(pending_text);
   1655       if (old_text_buf->text_length)
   1656         old_text_buf->text = MEMDUP(get_buffer(pending_text),
   1657 				    old_text_buf->text_length, char);
   1658       free_buffer(pending_text);
   1659       pending_text = NULL;
   1660     }
   1661 
   1662   for (go = jumps; go; go = release_label(go))
   1663     {
   1664       for (lbl = labels; lbl; lbl = lbl->next)
   1665 	if (strcmp(lbl->name, go->name) == 0)
   1666 	  break;
   1667       if (lbl)
   1668 	{
   1669 	  program->v[go->v_index].x.jump_index = lbl->v_index;
   1670 	}
   1671       else
   1672 	{
   1673 	  if (*go->name)
   1674 	    panic(_("can't find label for jump to `%s'"), go->name);
   1675 	  program->v[go->v_index].x.jump_index = program->v_length;
   1676 	}
   1677     }
   1678   jumps = NULL;
   1679 
   1680   for (lbl = labels; lbl; lbl = release_label(lbl))
   1681     ;
   1682   labels = NULL;
   1683 
   1684   /* There is no longer a need to track file names: */
   1685   {
   1686     struct output *p;
   1687 
   1688     for (p=file_read; p; p=p->link)
   1689       if (p->name)
   1690 	{
   1691 	  FREE(p->name);
   1692 	  p->name = NULL;
   1693 	}
   1694 
   1695     for (p=file_write; p; p=p->link)
   1696       if (p->name)
   1697 	{
   1698 	  FREE(p->name);
   1699 	  p->name = NULL;
   1700 	}
   1701   }
   1702 }
   1703 
   1704 /* Rewind all resources which were allocated in this module. */
   1705 void
   1706 rewind_read_files()
   1707 {
   1708   struct output *p;
   1709 
   1710   for (p=file_read; p; p=p->link)
   1711     if (p->fp)
   1712       rewind(p->fp);
   1713 }
   1714 
   1715 /* Release all resources which were allocated in this module. */
   1716 void
   1717 finish_program(program)
   1718   struct vector *program;
   1719 {
   1720   /* close all files... */
   1721   {
   1722     struct output *p, *q;
   1723 
   1724     for (p=file_read; p; p=q)
   1725       {
   1726 	if (p->fp)
   1727 	  ck_fclose(p->fp);
   1728 	q = p->link;
   1729 #if 0
   1730 	/* We use obstacks. */
   1731 	FREE(p);
   1732 #endif
   1733       }
   1734 
   1735     for (p=file_write; p; p=q)
   1736       {
   1737 	if (p->fp)
   1738 	  ck_fclose(p->fp);
   1739 	q = p->link;
   1740 #if 0
   1741 	/* We use obstacks. */
   1742 	FREE(p);
   1743 #endif
   1744       }
   1745     file_read = file_write = NULL;
   1746   }
   1747 
   1748 #ifdef DEBUG_LEAKS
   1749   obstack_free (&obs, NULL);
   1750 #endif /*DEBUG_LEAKS*/
   1751 }
   1752