1 /* GNU SED, a batch stream editor. 2 Copyright (C) 1999, 2002, 2003, 2004, 2005, 2006 3 Free Software Foundation, Inc. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19 #include "sed.h" 20 21 #include <ctype.h> 22 #include <string.h> 23 #include <stdio.h> 24 #ifdef HAVE_STDLIB_H 25 # include <stdlib.h> 26 #endif 27 28 #ifdef gettext_noop 29 # define N_(String) gettext_noop(String) 30 #else 31 # define N_(String) (String) 32 #endif 33 34 extern bool use_extended_syntax_p; 35 36 static const char errors[] = 37 "no previous regular expression\0" 38 "cannot specify modifiers on empty regexp"; 39 40 #define NO_REGEX (errors) 41 #define BAD_MODIF (NO_REGEX + sizeof(N_("no previous regular expression"))) 42 #define END_ERRORS (BAD_MODIF + sizeof(N_("cannot specify modifiers on empty regexp"))) 43 44 45 47 static void 48 compile_regex_1 (new_regex, needed_sub) 49 struct regex *new_regex; 50 int needed_sub; 51 { 52 #ifdef REG_PERL 53 int errcode; 54 errcode = regncomp(&new_regex->pattern, new_regex->re, new_regex->sz, 55 (needed_sub ? 0 : REG_NOSUB) 56 | new_regex->flags 57 | extended_regexp_flags); 58 59 if (errcode) 60 { 61 char errorbuf[200]; 62 regerror(errcode, NULL, errorbuf, 200); 63 bad_prog(gettext(errorbuf)); 64 } 65 #else 66 const char *error; 67 int syntax = ((extended_regexp_flags & REG_EXTENDED) 68 ? RE_SYNTAX_POSIX_EXTENDED 69 : RE_SYNTAX_POSIX_BASIC); 70 71 syntax &= ~RE_DOT_NOT_NULL; 72 syntax |= RE_NO_POSIX_BACKTRACKING; 73 74 switch (posixicity) 75 { 76 case POSIXLY_EXTENDED: 77 syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD; 78 break; 79 case POSIXLY_CORRECT: 80 syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD; 81 break; 82 case POSIXLY_BASIC: 83 syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_LIMITED_OPS | RE_NO_GNU_OPS; 84 break; 85 } 86 87 #ifdef RE_ICASE 88 syntax |= (new_regex->flags & REG_ICASE) ? RE_ICASE : 0; 89 #endif 90 #ifdef RE_NO_SUB 91 syntax |= needed_sub ? 0 : RE_NO_SUB; 92 #endif 93 94 new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); 95 96 /* If REG_NEWLINE is set, newlines are treated differently. */ 97 if (new_regex->flags & REG_NEWLINE) 98 { 99 /* REG_NEWLINE implies neither . nor [^...] match newline. */ 100 syntax &= ~RE_DOT_NEWLINE; 101 syntax |= RE_HAT_LISTS_NOT_NEWLINE; 102 } 103 104 re_set_syntax (syntax); 105 error = re_compile_pattern (new_regex->re, new_regex->sz, 106 &new_regex->pattern); 107 new_regex->pattern.newline_anchor = (new_regex->flags & REG_NEWLINE) != 0; 108 109 new_regex->pattern.translate = NULL; 110 #ifndef RE_ICASE 111 if (new_regex->flags & REG_ICASE) 112 { 113 static char translate[1 << (sizeof(char) * 8)]; 114 int i; 115 for (i = 0; i < sizeof(translate) / sizeof(char); i++) 116 translate[i] = tolower (i); 117 118 new_regex->pattern.translate = translate; 119 } 120 #endif 121 122 if (error) 123 bad_prog(error); 124 #endif 125 126 /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */ 127 if (needed_sub 128 && new_regex->pattern.re_nsub < needed_sub - 1 129 && posixicity == POSIXLY_EXTENDED) 130 { 131 char buf[200]; 132 sprintf(buf, _("invalid reference \\%d on `s' command's RHS"), 133 needed_sub - 1); 134 bad_prog(buf); 135 } 136 } 137 138 struct regex * 139 compile_regex(b, flags, needed_sub) 140 struct buffer *b; 141 int flags; 142 int needed_sub; 143 { 144 struct regex *new_regex; 145 size_t re_len; 146 147 /* // matches the last RE */ 148 if (size_buffer(b) == 0) 149 { 150 if (flags > 0) 151 bad_prog(_(BAD_MODIF)); 152 return NULL; 153 } 154 155 re_len = size_buffer(b); 156 new_regex = ck_malloc(sizeof (struct regex) + re_len - 1); 157 new_regex->flags = flags; 158 memcpy (new_regex->re, get_buffer(b), re_len); 159 160 #ifdef REG_PERL 161 new_regex->sz = re_len; 162 #else 163 /* GNU regex does not process \t & co. */ 164 new_regex->sz = normalize_text(new_regex->re, re_len, TEXT_REGEX); 165 #endif 166 167 compile_regex_1 (new_regex, needed_sub); 168 return new_regex; 169 } 170 171 #ifdef REG_PERL 172 static void 173 copy_regs (regs, pmatch, nregs) 174 struct re_registers *regs; 175 regmatch_t *pmatch; 176 int nregs; 177 { 178 int i; 179 int need_regs = nregs + 1; 180 /* We need one extra element beyond `num_regs' for the `-1' marker GNU code 181 uses. */ 182 183 /* Have the register data arrays been allocated? */ 184 if (!regs->start) 185 { /* No. So allocate them with malloc. */ 186 regs->start = MALLOC (need_regs, regoff_t); 187 regs->end = MALLOC (need_regs, regoff_t); 188 regs->num_regs = need_regs; 189 } 190 else if (need_regs > regs->num_regs) 191 { /* Yes. We also need more elements than were already 192 allocated, so reallocate them. */ 193 regs->start = REALLOC (regs->start, need_regs, regoff_t); 194 regs->end = REALLOC (regs->end, need_regs, regoff_t); 195 regs->num_regs = need_regs; 196 } 197 198 /* Copy the regs. */ 199 for (i = 0; i < nregs; ++i) 200 { 201 regs->start[i] = pmatch[i].rm_so; 202 regs->end[i] = pmatch[i].rm_eo; 203 } 204 for ( ; i < regs->num_regs; ++i) 205 regs->start[i] = regs->end[i] = -1; 206 } 207 #endif 208 209 int 210 match_regex(regex, buf, buflen, buf_start_offset, regarray, regsize) 211 struct regex *regex; 212 char *buf; 213 size_t buflen; 214 size_t buf_start_offset; 215 struct re_registers *regarray; 216 int regsize; 217 { 218 int ret; 219 static struct regex *regex_last; 220 #ifdef REG_PERL 221 regmatch_t rm[10], *regmatch = rm; 222 if (regsize > 10) 223 regmatch = (regmatch_t *) alloca (sizeof (regmatch_t) * regsize); 224 #endif 225 226 /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */ 227 228 /* Keep track of the last regexp matched. */ 229 if (!regex) 230 { 231 regex = regex_last; 232 if (!regex_last) 233 bad_prog(_(NO_REGEX)); 234 } 235 else 236 regex_last = regex; 237 238 #ifdef REG_PERL 239 regmatch[0].rm_so = CAST(int)buf_start_offset; 240 regmatch[0].rm_eo = CAST(int)buflen; 241 ret = regexec (®ex->pattern, buf, regsize, regmatch, REG_STARTEND); 242 243 if (regsize) 244 copy_regs (regarray, regmatch, regsize); 245 246 return (ret == 0); 247 #else 248 if (regex->pattern.no_sub && regsize) 249 compile_regex_1 (regex, regsize); 250 251 regex->pattern.regs_allocated = REGS_REALLOCATE; 252 253 ret = re_search (®ex->pattern, buf, buflen, buf_start_offset, 254 buflen - buf_start_offset, 255 regsize ? regarray : NULL); 256 257 return (ret > -1); 258 #endif 259 } 260 261 262 #ifdef DEBUG_LEAKS 263 void 264 release_regex(regex) 265 struct regex *regex; 266 { 267 regfree(®ex->pattern); 268 FREE(regex); 269 } 270 #endif /*DEBUG_LEAKS*/ 271