1 /* GNU SED, a batch stream editor. 2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003 3 Free Software Foundation, Inc. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 #ifdef HAVE_CONFIG_H 20 #include "config.h" 21 #endif 22 23 #include "basicdefs.h" 24 #include "regex.h" 25 26 #ifndef BOOTSTRAP 27 #include <stdio.h> 28 #include "unlocked-io.h" 29 #endif 30 31 #include "utils.h" 32 33 /* Struct vector is used to describe a compiled sed program. */ 34 struct vector { 35 struct sed_cmd *v; /* a dynamically allocated array */ 36 size_t v_allocated; /* ... number slots allocated */ 37 size_t v_length; /* ... number of slots in use */ 38 }; 39 40 /* This structure tracks files used by sed so that they may all be 41 closed cleanly at normal program termination. A flag is kept that tells 42 if a missing newline was encountered, so that it is added on the 43 next line and the two lines are not concatenated. */ 44 struct output { 45 char *name; 46 bool missing_newline; 47 FILE *fp; 48 struct output *link; 49 }; 50 51 struct text_buf { 52 char *text; 53 size_t text_length; 54 }; 55 56 struct regex { 57 regex_t pattern; 58 int flags; 59 size_t sz; 60 char re[1]; 61 }; 62 63 enum replacement_types { 64 REPL_ASIS = 0, 65 REPL_UPPERCASE = 1, 66 REPL_LOWERCASE = 2, 67 REPL_UPPERCASE_FIRST = 4, 68 REPL_LOWERCASE_FIRST = 8, 69 REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST, 70 71 /* These are given to aid in debugging */ 72 REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE, 73 REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE, 74 REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE, 75 REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE 76 }; 77 78 enum text_types { 79 TEXT_BUFFER, 80 TEXT_REPLACEMENT, 81 TEXT_REGEX 82 }; 83 84 enum posixicity_types { 85 POSIXLY_EXTENDED, /* with GNU extensions */ 86 POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */ 87 POSIXLY_BASIC /* pedantically POSIX */ 88 }; 89 90 enum addr_state { 91 RANGE_INACTIVE, /* never been active */ 92 RANGE_ACTIVE, /* between first and second address */ 93 RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */ 94 }; 95 96 enum addr_types { 97 ADDR_IS_NULL, /* null address */ 98 ADDR_IS_REGEX, /* a.addr_regex is valid */ 99 ADDR_IS_NUM, /* a.addr_number is valid */ 100 ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */ 101 ADDR_IS_STEP, /* address is +N (only valid for addr2) */ 102 ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */ 103 ADDR_IS_LAST /* address is $ */ 104 }; 105 106 struct addr { 107 enum addr_types addr_type; 108 countT addr_number; 109 countT addr_step; 110 struct regex *addr_regex; 111 }; 112 113 114 struct replacement { 115 char *prefix; 116 size_t prefix_length; 117 int subst_id; 118 enum replacement_types repl_type; 119 struct replacement *next; 120 }; 121 122 struct subst { 123 struct regex *regx; 124 struct replacement *replacement; 125 countT numb; /* if >0, only substitute for match number "numb" */ 126 struct output *outf; /* 'w' option given */ 127 unsigned global : 1; /* 'g' option given */ 128 unsigned print : 2; /* 'p' option given (before/after eval) */ 129 unsigned eval : 1; /* 'e' option given */ 130 unsigned max_id : 4; /* maximum backreference on the RHS */ 131 }; 132 133 #ifdef REG_PERL 134 /* This is the structure we store register match data in. See 135 regex.texinfo for a full description of what registers match. */ 136 struct re_registers 137 { 138 unsigned num_regs; 139 regoff_t *start; 140 regoff_t *end; 141 }; 142 #endif 143 144 145 146 struct sed_cmd { 147 struct addr *a1; /* save space: usually is NULL */ 148 struct addr *a2; 149 150 /* See description the enum, above. */ 151 enum addr_state range_state; 152 153 /* Non-zero if command is to be applied to non-matches. */ 154 char addr_bang; 155 156 /* The actual command character. */ 157 char cmd; 158 159 /* auxiliary data for various commands */ 160 union { 161 /* This structure is used for a, i, and c commands. */ 162 struct text_buf cmd_txt; 163 164 /* This is used for the l, q and Q commands. */ 165 int int_arg; 166 167 /* This is used for the {}, b, and t commands. */ 168 countT jump_index; 169 170 /* This is used for the r command. */ 171 char *fname; 172 173 /* This is used for the hairy s command. */ 174 struct subst *cmd_subst; 175 176 /* This is used for the w command. */ 177 struct output *outf; 178 179 /* This is used for the R command. */ 180 FILE *fp; 181 182 /* This is used for the y command. */ 183 unsigned char *translate; 184 char **translatemb; 185 } x; 186 }; 187 188 189 190 void bad_prog P_((const char *why)); 192 size_t normalize_text P_((char *text, size_t len, enum text_types buftype)); 193 struct vector *compile_string P_((struct vector *, char *str, size_t len)); 194 struct vector *compile_file P_((struct vector *, const char *cmdfile)); 195 void check_final_program P_((struct vector *)); 196 void rewind_read_files P_((void)); 197 void finish_program P_((struct vector *)); 198 199 struct regex *compile_regex P_((struct buffer *b, int flags, int needed_sub)); 200 int match_regex P_((struct regex *regex, 201 char *buf, size_t buflen, size_t buf_start_offset, 202 struct re_registers *regarray, int regsize)); 203 #ifdef DEBUG_LEAKS 204 void release_regex P_((struct regex *)); 205 #endif 206 207 int process_files P_((struct vector *, char **argv)); 208 209 int main P_((int, char **)); 210 211 extern void fmt P_ ((const char *line, const char *line_end, int max_length, FILE *output_file)); 212 213 extern int extended_regexp_flags; 214 215 /* If set, fflush(stdout) on every line output. */ 216 extern bool unbuffered_output; 217 218 /* If set, don't write out the line unless explicitly told to. */ 219 extern bool no_default_output; 220 221 /* If set, reset line counts on every new file. */ 222 extern bool separate_files; 223 224 /* If set, follow symlinks when invoked with -i option */ 225 extern bool follow_symlinks; 226 227 /* Do we need to be pedantically POSIX compliant? */ 228 extern enum posixicity_types posixicity; 229 230 /* How long should the `l' command's output line be? */ 231 extern countT lcmd_out_line_len; 232 233 /* How do we edit files in-place? (we don't if NULL) */ 234 extern char *in_place_extension; 235 236 /* The mode to use to read files, either "rt" or "rb". */ 237 extern char *read_mode; 238 239 /* Should we use EREs? */ 240 extern bool use_extended_syntax_p; 241 242 /* Declarations for multibyte character sets. */ 243 extern int mb_cur_max; 244 extern bool is_utf8; 245 246 #ifdef HAVE_MBRTOWC 247 #ifdef HAVE_BTOWC 248 #define MBRTOWC(pwc, s, n, ps) \ 249 (mb_cur_max == 1 ? \ 250 (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \ 251 mbrtowc ((pwc), (s), (n), (ps))) 252 253 #define WCRTOMB(s, wc, ps) \ 254 (mb_cur_max == 1 ? \ 255 (*(s) = wctob ((wint_t) (wc)), 1) : \ 256 wcrtomb ((s), (wc), (ps))) 257 #else 258 #define MBRTOWC(pwc, s, n, ps) \ 259 mbrtowc ((pwc), (s), (n), (ps)) 260 261 #define WCRTOMB(s, wc, ps) \ 262 wcrtomb ((s), (wc), (ps)) 263 #endif 264 265 #define MBSINIT(s) \ 266 (mb_cur_max == 1 ? 1 : mbsinit ((s))) 267 268 #define MBRLEN(s, n, ps) \ 269 (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps)) 270 271 #define BRLEN(ch, ps) \ 272 (mb_cur_max == 1 ? 1 : brlen (ch, ps)) 273 274 #else 275 #define MBSINIT(s) 1 276 #define MBRLEN(s, n, ps) 1 277 #define BRLEN(ch, ps) 1 278 #endif 279 280 extern int brlen P_ ((int ch, mbstate_t *ps)); 281 extern void initialize_mbcs P_ ((void)); 282 283