1 /* 2 * This file is part of ltrace. 3 * Copyright (C) 2007,2008,2012,2013 Petr Machata, Red Hat Inc. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the 8 * License, or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 * 02110-1301 USA 19 */ 20 21 #include <sys/types.h> 22 #include <regex.h> 23 #include <string.h> 24 #include <stdlib.h> 25 #include <assert.h> 26 27 static ssize_t 28 match_character_class(const char *glob, size_t length, size_t from) 29 { 30 assert(length > 0); 31 const char *colon = memchr(glob + from + 2, ':', length - 1); 32 if (colon == NULL || colon[1] != ']') 33 return -1; 34 return colon - glob; 35 } 36 37 static ssize_t 38 match_brack(const char *glob, size_t length, size_t from, int *exclmp) 39 { 40 size_t i = from + 1; 41 42 if (i >= length) 43 return -1; 44 45 /* Complement operator. */ 46 *exclmp = 0; 47 if (glob[i] == '^' || glob[i] == '!') { 48 *exclmp = glob[i++] == '!'; 49 if (i >= length) 50 return -1; 51 } 52 53 /* On first character, both [ and ] are legal. But when [ is 54 * followed with :, it's character class. */ 55 if (glob[i] == '[' && glob[i + 1] == ':') { 56 ssize_t j = match_character_class(glob, length, i); 57 if (j < 0) 58 fail: 59 return -1; 60 i = j; 61 } 62 ++i; /* skip any character, including [ or ] */ 63 64 for (; i < length; ++i) { 65 char c = glob[i]; 66 if (c == '[' && glob[i + 1] == ':') { 67 ssize_t j = match_character_class(glob, length, i); 68 if (j < 0) 69 goto fail; 70 i = j; 71 72 } else if (c == ']') { 73 return i; 74 } 75 } 76 return -1; 77 } 78 79 static int 80 append(char **bufp, const char *str, size_t str_size, 81 size_t *sizep, size_t *allocp) 82 { 83 if (str_size == 0) 84 str_size = strlen(str); 85 size_t nsize = *sizep + str_size; 86 if (nsize > *allocp) { 87 size_t nalloc = nsize * 2; 88 char *nbuf = realloc(*bufp, nalloc); 89 if (nbuf == NULL) 90 return -1; 91 *allocp = nalloc; 92 *bufp = nbuf; 93 } 94 95 memcpy(*bufp + *sizep, str, str_size); 96 *sizep = nsize; 97 return 0; 98 } 99 100 static int 101 glob_to_regex(const char *glob, char **retp) 102 { 103 size_t allocd = 0; 104 size_t size = 0; 105 char *buf = NULL; 106 107 size_t length = strlen(glob); 108 int escape = 0; 109 size_t i; 110 for(i = 0; i < length; ++i) { 111 char c = glob[i]; 112 if (escape) { 113 if (c == '\\') { 114 if (append(&buf, "\\\\", 0, 115 &size, &allocd) < 0) { 116 fail: 117 free(buf); 118 return REG_ESPACE; 119 } 120 121 } else if (c == '*') { 122 if (append(&buf, "\\*", 0, &size, &allocd) < 0) 123 goto fail; 124 } else if (c == '?') { 125 if (append(&buf, "?", 0, &size, &allocd) < 0) 126 goto fail; 127 } else if (append(&buf, (char[]){ '\\', c }, 2, 128 &size, &allocd) < 0) 129 goto fail; 130 escape = 0; 131 } else { 132 if (c == '\\') 133 escape = 1; 134 else if (c == '[') { 135 int exclm; 136 ssize_t j = match_brack(glob, length, i, &exclm); 137 if (j < 0) { 138 free(buf); 139 return REG_EBRACK; 140 } 141 if (exclm 142 && append(&buf, "[^", 2, 143 &size, &allocd) < 0) 144 goto fail; 145 if (append(&buf, glob + i + 2*exclm, 146 j - i + 1 - 2*exclm, 147 &size, &allocd) < 0) 148 goto fail; 149 i = j; 150 151 } else if (c == '*') { 152 if (append(&buf, ".*", 0, &size, &allocd) < 0) 153 goto fail; 154 } else if (c == '?') { 155 if (append(&buf, ".", 0, &size, &allocd) < 0) 156 goto fail; 157 } else if (c == '.') { 158 if (append(&buf, "\\.", 0, &size, &allocd) < 0) 159 goto fail; 160 } else if (append(&buf, &c, 1, &size, &allocd) < 0) 161 goto fail; 162 } 163 } 164 165 if (escape) { 166 free(buf); 167 return REG_EESCAPE; 168 } 169 170 { 171 char c = 0; 172 if (append(&buf, &c, 1, &size, &allocd) < 0) 173 goto fail; 174 } 175 *retp = buf; 176 return 0; 177 } 178 179 int 180 globcomp(regex_t *preg, const char *glob, int cflags) 181 { 182 char *regex = NULL; 183 int status = glob_to_regex(glob, ®ex); 184 if (status != 0) 185 return status; 186 assert(regex != NULL); 187 status = regcomp(preg, regex, cflags); 188 free(regex); 189 return status; 190 } 191 192 #ifdef TEST 193 #include <stdio.h> 194 195 static void 196 translate(const char *glob, int exp_status, const char *expect) 197 { 198 char *pattern = NULL; 199 int status = glob_to_regex(glob, &pattern); 200 if (status != exp_status) { 201 fprintf(stderr, "translating %s, expected status %d, got %d\n", 202 glob, exp_status, status); 203 return; 204 } 205 206 if (status == 0) { 207 assert(pattern != NULL); 208 if (strcmp(pattern, expect) != 0) 209 fprintf(stderr, "translating %s, expected %s, got %s\n", 210 glob, expect, pattern); 211 free(pattern); 212 } else { 213 assert(pattern == NULL); 214 } 215 } 216 217 static void 218 try_match(const char *glob, const char *str, int expect) 219 { 220 regex_t preg; 221 int status = globcomp(&preg, glob, 0); 222 assert(status == 0); 223 status = regexec(&preg, str, 0, NULL, 0); 224 assert(status == expect); 225 regfree(&preg); 226 } 227 228 int 229 main(void) 230 { 231 translate("*", 0, ".*"); 232 translate("?", 0, "."); 233 translate(".*", 0, "\\..*"); 234 translate("*.*", 0, ".*\\..*"); 235 translate("*a*", 0, ".*a.*"); 236 translate("[abc]", 0, "[abc]"); 237 translate("[^abc]", 0, "[^abc]"); 238 translate("[!abc]", 0, "[^abc]"); 239 translate("[]]", 0, "[]]"); 240 translate("[[]", 0, "[[]"); 241 translate("[^]]", 0, "[^]]"); 242 translate("[^a-z]", 0, "[^a-z]"); 243 translate("[abc\\]]", 0, "[abc\\]]"); 244 translate("[abc\\]def]", 0, "[abc\\]def]"); 245 translate("[[:space:]]", 0, "[[:space:]]"); 246 translate("[^[:space:]]", 0, "[^[:space:]]"); 247 translate("[![:space:]]", 0, "[^[:space:]]"); 248 translate("[^a-z]*", 0, "[^a-z].*"); 249 translate("[^a-z]bar*", 0, "[^a-z]bar.*"); 250 translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0, 251 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\." 252 ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."); 253 254 translate("\\", REG_EESCAPE, NULL); 255 translate("[^[:naotuh\\", REG_EBRACK, NULL); 256 translate("[^[:", REG_EBRACK, NULL); 257 translate("[^[", REG_EBRACK, NULL); 258 translate("[^", REG_EBRACK, NULL); 259 translate("[\\", REG_EBRACK, NULL); 260 translate("[", REG_EBRACK, NULL); 261 translate("abc[", REG_EBRACK, NULL); 262 263 try_match("abc*def", "abc012def", 0); 264 try_match("abc*def", "ab012def", REG_NOMATCH); 265 try_match("[abc]*def", "a1def", 0); 266 try_match("[abc]*def", "b1def", 0); 267 try_match("[abc]*def", "d1def", REG_NOMATCH); 268 269 return 0; 270 } 271 272 #endif 273