Home | History | Annotate | Download | only in ltrace
      1 /*
      2  * This file is part of ltrace.
      3  * Copyright (C) 2007,2008,2012,2013 Petr Machata, Red Hat Inc.
      4  *
      5  * This program is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU General Public License as
      7  * published by the Free Software Foundation; either version 2 of the
      8  * License, or (at your option) any later version.
      9  *
     10  * This program is distributed in the hope that it will be useful, but
     11  * WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU General Public License
     16  * along with this program; if not, write to the Free Software
     17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
     18  * 02110-1301 USA
     19  */
     20 
     21 #include <sys/types.h>
     22 #include <regex.h>
     23 #include <string.h>
     24 #include <stdlib.h>
     25 #include <assert.h>
     26 
     27 static ssize_t
     28 match_character_class(const char *glob, size_t length, size_t from)
     29 {
     30 	assert(length > 0);
     31 	const char *colon = memchr(glob + from + 2, ':', length - 1);
     32 	if (colon == NULL || colon[1] != ']')
     33 		return -1;
     34 	return colon - glob;
     35 }
     36 
     37 static ssize_t
     38 match_brack(const char *glob, size_t length, size_t from, int *exclmp)
     39 {
     40 	size_t i = from + 1;
     41 
     42 	if (i >= length)
     43 		return -1;
     44 
     45 	/* Complement operator.  */
     46 	*exclmp = 0;
     47 	if (glob[i] == '^' || glob[i] == '!') {
     48 		*exclmp = glob[i++] == '!';
     49 		if (i >= length)
     50 			return -1;
     51 	}
     52 
     53 	/* On first character, both [ and ] are legal.  But when [ is
     54 	 * followed with :, it's character class.  */
     55 	if (glob[i] == '[' && glob[i + 1] == ':') {
     56 		ssize_t j = match_character_class(glob, length, i);
     57 		if (j < 0)
     58 		fail:
     59 			return -1;
     60 		i = j;
     61 	}
     62 	++i; /* skip any character, including [ or ]  */
     63 
     64 	for (; i < length; ++i) {
     65 		char c = glob[i];
     66 		if (c == '[' && glob[i + 1] == ':') {
     67 			ssize_t j = match_character_class(glob, length, i);
     68 			if (j < 0)
     69 				goto fail;
     70 			i = j;
     71 
     72 		} else if (c == ']') {
     73 			return i;
     74 		}
     75 	}
     76 	return -1;
     77 }
     78 
     79 static int
     80 append(char **bufp, const char *str, size_t str_size,
     81        size_t *sizep, size_t *allocp)
     82 {
     83 	if (str_size == 0)
     84 		str_size = strlen(str);
     85 	size_t nsize = *sizep + str_size;
     86 	if (nsize > *allocp) {
     87 		size_t nalloc = nsize * 2;
     88 		char *nbuf = realloc(*bufp, nalloc);
     89 		if (nbuf == NULL)
     90 			return -1;
     91 		*allocp = nalloc;
     92 		*bufp = nbuf;
     93 	}
     94 
     95 	memcpy(*bufp + *sizep, str, str_size);
     96 	*sizep = nsize;
     97 	return 0;
     98 }
     99 
    100 static int
    101 glob_to_regex(const char *glob, char **retp)
    102 {
    103 	size_t allocd = 0;
    104 	size_t size = 0;
    105 	char *buf = NULL;
    106 
    107 	size_t length = strlen(glob);
    108 	int escape = 0;
    109 	size_t i;
    110 	for(i = 0; i < length; ++i) {
    111 		char c = glob[i];
    112 		if (escape) {
    113 			if (c == '\\') {
    114 				if (append(&buf, "\\\\", 0,
    115 					   &size, &allocd) < 0) {
    116 				fail:
    117 					free(buf);
    118 					return REG_ESPACE;
    119 				}
    120 
    121 			} else if (c == '*') {
    122 				if (append(&buf, "\\*", 0, &size, &allocd) < 0)
    123 					goto fail;
    124 			} else if (c == '?') {
    125 				if (append(&buf, "?", 0, &size, &allocd) < 0)
    126 					goto fail;
    127 			} else if (append(&buf, (char[]){ '\\', c }, 2,
    128 					  &size, &allocd) < 0)
    129 				goto fail;
    130 			escape = 0;
    131 		} else {
    132 			if (c == '\\')
    133 				escape = 1;
    134 			else if (c == '[') {
    135 				int exclm;
    136 				ssize_t j = match_brack(glob, length, i, &exclm);
    137 				if (j < 0) {
    138 					free(buf);
    139 					return REG_EBRACK;
    140 				}
    141 				if (exclm
    142 				    && append(&buf, "[^", 2,
    143 					      &size, &allocd) < 0)
    144 					goto fail;
    145 				if (append(&buf, glob + i + 2*exclm,
    146 					   j - i + 1 - 2*exclm,
    147 					   &size, &allocd) < 0)
    148 					goto fail;
    149 				i = j;
    150 
    151 			} else if (c == '*') {
    152 				if (append(&buf, ".*", 0, &size, &allocd) < 0)
    153 					goto fail;
    154 			} else if (c == '?') {
    155 				if (append(&buf, ".", 0, &size, &allocd) < 0)
    156 					goto fail;
    157 			} else if (c == '.') {
    158 				if (append(&buf, "\\.", 0, &size, &allocd) < 0)
    159 					goto fail;
    160 			} else if (append(&buf, &c, 1, &size, &allocd) < 0)
    161 				goto fail;
    162 		}
    163 	}
    164 
    165 	if (escape) {
    166 		free(buf);
    167 		return REG_EESCAPE;
    168 	}
    169 
    170 	{
    171 		char c = 0;
    172 		if (append(&buf, &c, 1, &size, &allocd) < 0)
    173 			goto fail;
    174 	}
    175 	*retp = buf;
    176 	return 0;
    177 }
    178 
    179 int
    180 globcomp(regex_t *preg, const char *glob, int cflags)
    181 {
    182 	char *regex = NULL;
    183 	int status = glob_to_regex(glob, &regex);
    184 	if (status != 0)
    185 		return status;
    186 	assert(regex != NULL);
    187 	status = regcomp(preg, regex, cflags);
    188 	free(regex);
    189 	return status;
    190 }
    191 
    192 #ifdef TEST
    193 #include <stdio.h>
    194 
    195 static void
    196 translate(const char *glob, int exp_status, const char *expect)
    197 {
    198 	char *pattern = NULL;
    199 	int status = glob_to_regex(glob, &pattern);
    200 	if (status != exp_status) {
    201 		fprintf(stderr, "translating %s, expected status %d, got %d\n",
    202 			glob, exp_status, status);
    203 		return;
    204 	}
    205 
    206 	if (status == 0) {
    207 		assert(pattern != NULL);
    208 		if (strcmp(pattern, expect) != 0)
    209 			fprintf(stderr, "translating %s, expected %s, got %s\n",
    210 				glob, expect, pattern);
    211 		free(pattern);
    212 	} else {
    213 		assert(pattern == NULL);
    214 	}
    215 }
    216 
    217 static void
    218 try_match(const char *glob, const char *str, int expect)
    219 {
    220 	regex_t preg;
    221 	int status = globcomp(&preg, glob, 0);
    222 	assert(status == 0);
    223 	status = regexec(&preg, str, 0, NULL, 0);
    224 	assert(status == expect);
    225 	regfree(&preg);
    226 }
    227 
    228 int
    229 main(void)
    230 {
    231         translate("*", 0, ".*");
    232         translate("?", 0, ".");
    233         translate(".*", 0, "\\..*");
    234         translate("*.*", 0, ".*\\..*");
    235         translate("*a*", 0, ".*a.*");
    236         translate("[abc]", 0, "[abc]");
    237         translate("[^abc]", 0, "[^abc]");
    238         translate("[!abc]", 0, "[^abc]");
    239         translate("[]]", 0, "[]]");
    240         translate("[[]", 0, "[[]");
    241         translate("[^]]", 0, "[^]]");
    242         translate("[^a-z]", 0, "[^a-z]");
    243         translate("[abc\\]]", 0, "[abc\\]]");
    244         translate("[abc\\]def]", 0, "[abc\\]def]");
    245         translate("[[:space:]]", 0, "[[:space:]]");
    246         translate("[^[:space:]]", 0, "[^[:space:]]");
    247         translate("[![:space:]]", 0, "[^[:space:]]");
    248         translate("[^a-z]*", 0, "[^a-z].*");
    249         translate("[^a-z]bar*", 0, "[^a-z]bar.*");
    250 	translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
    251 		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
    252 		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
    253 
    254         translate("\\", REG_EESCAPE, NULL);
    255         translate("[^[:naotuh\\", REG_EBRACK, NULL);
    256         translate("[^[:", REG_EBRACK, NULL);
    257         translate("[^[", REG_EBRACK, NULL);
    258         translate("[^", REG_EBRACK, NULL);
    259         translate("[\\", REG_EBRACK, NULL);
    260         translate("[", REG_EBRACK, NULL);
    261         translate("abc[", REG_EBRACK, NULL);
    262 
    263 	try_match("abc*def", "abc012def", 0);
    264 	try_match("abc*def", "ab012def", REG_NOMATCH);
    265 	try_match("[abc]*def", "a1def", 0);
    266 	try_match("[abc]*def", "b1def", 0);
    267 	try_match("[abc]*def", "d1def", REG_NOMATCH);
    268 
    269 	return 0;
    270 }
    271 
    272 #endif
    273