Home | History | Annotate | Download | only in src
      1 #include <assert.h>
      2 #include <stdint.h>
      3 #include <stdio.h>
      4 #include <string.h>
      5 
      6 #include "regex.h"
      7 #include "label_file.h"
      8 
      9 #ifdef USE_PCRE2
     10 #define REGEX_ARCH_SIZE_T PCRE2_SIZE
     11 #else
     12 #define REGEX_ARCH_SIZE_T size_t
     13 #endif
     14 
     15 #ifndef __BYTE_ORDER__
     16 
     17 /* If the compiler doesn't define __BYTE_ORDER__, try to use the C
     18  * library <endian.h> header definitions. */
     19 #include <endian.h>
     20 #ifndef __BYTE_ORDER
     21 #error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness.
     22 #endif
     23 
     24 #define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN
     25 #define __ORDER_BIG_ENDIAN __BIG_ENDIAN
     26 #define __BYTE_ORDER__ __BYTE_ORDER
     27 
     28 #endif
     29 
     30 #ifdef USE_PCRE2
     31 char const *regex_arch_string(void)
     32 {
     33 	static char arch_string_buffer[32];
     34 	static char const *arch_string = "";
     35 	char const *endianness = NULL;
     36 	int rc;
     37 
     38 	if (arch_string[0] == '\0') {
     39 		if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
     40 			endianness = "el";
     41 		else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
     42 			endianness = "eb";
     43 
     44 		if (!endianness)
     45 			return NULL;
     46 
     47 		rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer),
     48 				"%zu-%zu-%s", sizeof(void *),
     49 				sizeof(REGEX_ARCH_SIZE_T),
     50 				endianness);
     51 		if (rc < 0)
     52 			abort();
     53 
     54 		arch_string = &arch_string_buffer[0];
     55 	}
     56 	return arch_string;
     57 }
     58 
     59 struct regex_data {
     60 	pcre2_code *regex; /* compiled regular expression */
     61 	/*
     62 	 * match data block required for the compiled
     63 	 * pattern in pcre2
     64 	 */
     65 	pcre2_match_data *match_data;
     66 };
     67 
     68 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
     69 		       struct regex_error_data *errordata)
     70 {
     71 	memset(errordata, 0, sizeof(struct regex_error_data));
     72 
     73 	*regex = regex_data_create();
     74 	if (!(*regex))
     75 		return -1;
     76 
     77 	(*regex)->regex = pcre2_compile(
     78 	    (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
     79 	    &errordata->error_code, &errordata->error_offset, NULL);
     80 	if (!(*regex)->regex) {
     81 		goto err;
     82 	}
     83 
     84 	(*regex)->match_data =
     85 	    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
     86 	if (!(*regex)->match_data) {
     87 		goto err;
     88 	}
     89 	return 0;
     90 
     91 err:
     92 	regex_data_free(*regex);
     93 	*regex = NULL;
     94 	return -1;
     95 }
     96 
     97 char const *regex_version(void)
     98 {
     99 	static char version_buf[256];
    100 	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
    101 	if (len <= 0 || len > sizeof(version_buf))
    102 		return NULL;
    103 
    104 	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
    105 	return version_buf;
    106 }
    107 
    108 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
    109 		    int do_load_precompregex)
    110 {
    111 	int rc;
    112 	uint32_t entry_len;
    113 
    114 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
    115 	if (rc < 0)
    116 		return -1;
    117 
    118 	if (entry_len && do_load_precompregex) {
    119 		/*
    120 		 * this should yield exactly one because we store one pattern at
    121 		 * a time
    122 		 */
    123 		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
    124 		if (rc != 1)
    125 			return -1;
    126 
    127 		*regex = regex_data_create();
    128 		if (!*regex)
    129 			return -1;
    130 
    131 		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
    132 					    (PCRE2_SPTR)mmap_area->next_addr,
    133 					    NULL);
    134 		if (rc != 1)
    135 			goto err;
    136 
    137 		(*regex)->match_data =
    138 		    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
    139 		if (!(*regex)->match_data)
    140 			goto err;
    141 	}
    142 
    143 	/* and skip the decoded bit */
    144 	rc = next_entry(NULL, mmap_area, entry_len);
    145 	if (rc < 0)
    146 		goto err;
    147 
    148 	return 0;
    149 err:
    150 	regex_data_free(*regex);
    151 	*regex = NULL;
    152 	return -1;
    153 }
    154 
    155 int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
    156 {
    157 	int rc = 0;
    158 	size_t len;
    159 	PCRE2_SIZE serialized_size;
    160 	uint32_t to_write = 0;
    161 	PCRE2_UCHAR *bytes = NULL;
    162 
    163 	if (do_write_precompregex) {
    164 		/* encode the patter for serialization */
    165 		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex,
    166 					    1, &bytes, &serialized_size, NULL);
    167 		if (rc != 1) {
    168 			rc = -1;
    169 			goto out;
    170 		}
    171 		to_write = serialized_size;
    172 	}
    173 
    174 	/* write serialized pattern's size */
    175 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
    176 	if (len != 1) {
    177 		rc = -1;
    178 		goto out;
    179 	}
    180 
    181 	if (do_write_precompregex) {
    182 		/* write serialized pattern */
    183 		len = fwrite(bytes, 1, to_write, fp);
    184 		if (len != to_write)
    185 			rc = -1;
    186 	}
    187 
    188 out:
    189 	if (bytes)
    190 		pcre2_serialize_free(bytes);
    191 
    192 	return rc;
    193 }
    194 
    195 void regex_data_free(struct regex_data *regex)
    196 {
    197 	if (regex) {
    198 		if (regex->regex)
    199 			pcre2_code_free(regex->regex);
    200 		if (regex->match_data)
    201 			pcre2_match_data_free(regex->match_data);
    202 		free(regex);
    203 	}
    204 }
    205 
    206 int regex_match(struct regex_data *regex, char const *subject, int partial)
    207 {
    208 	int rc;
    209 	rc = pcre2_match(
    210 	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
    211 	    partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
    212 	if (rc > 0)
    213 		return REGEX_MATCH;
    214 	switch (rc) {
    215 	case PCRE2_ERROR_PARTIAL:
    216 		return REGEX_MATCH_PARTIAL;
    217 	case PCRE2_ERROR_NOMATCH:
    218 		return REGEX_NO_MATCH;
    219 	default:
    220 		return REGEX_ERROR;
    221 	}
    222 }
    223 
    224 /*
    225  * TODO Replace this compare function with something that actually compares the
    226  * regular expressions.
    227  * This compare function basically just compares the binary representations of
    228  * the automatons, and because this representation contains pointers and
    229  * metadata, it can only return a match if regex1 == regex2.
    230  * Preferably, this function would be replaced with an algorithm that computes
    231  * the equivalence of the automatons systematically.
    232  */
    233 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
    234 {
    235 	int rc;
    236 	size_t len1, len2;
    237 	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
    238 	assert(rc == 0);
    239 	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
    240 	assert(rc == 0);
    241 	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
    242 		return SELABEL_INCOMPARABLE;
    243 
    244 	return SELABEL_EQUAL;
    245 }
    246 
    247 #else // !USE_PCRE2
    248 char const *regex_arch_string(void)
    249 {
    250 	return "N/A";
    251 }
    252 
    253 /* Prior to version 8.20, libpcre did not have pcre_free_study() */
    254 #if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
    255 #define pcre_free_study pcre_free
    256 #endif
    257 
    258 struct regex_data {
    259 	int owned;   /*
    260 		      * non zero if regex and pcre_extra is owned by this
    261 		      * structure and thus must be freed on destruction.
    262 		      */
    263 	pcre *regex; /* compiled regular expression */
    264 	union {
    265 		pcre_extra *sd; /* pointer to extra compiled stuff */
    266 		pcre_extra lsd; /* used to hold the mmap'd version */
    267 	};
    268 };
    269 
    270 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
    271 		       struct regex_error_data *errordata)
    272 {
    273 	memset(errordata, 0, sizeof(struct regex_error_data));
    274 
    275 	*regex = regex_data_create();
    276 	if (!(*regex))
    277 		return -1;
    278 
    279 	(*regex)->regex =
    280 	    pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
    281 			 &errordata->error_offset, NULL);
    282 	if (!(*regex)->regex)
    283 		goto err;
    284 
    285 	(*regex)->owned = 1;
    286 
    287 	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
    288 	if (!(*regex)->sd && errordata->error_buffer)
    289 		goto err;
    290 
    291 	return 0;
    292 
    293 err:
    294 	regex_data_free(*regex);
    295 	*regex = NULL;
    296 	return -1;
    297 }
    298 
    299 char const *regex_version(void)
    300 {
    301 	return pcre_version();
    302 }
    303 
    304 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
    305 		    int unused __attribute__((unused)))
    306 {
    307 	int rc;
    308 	uint32_t entry_len;
    309 	size_t info_len;
    310 
    311 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
    312 	if (rc < 0 || !entry_len)
    313 		return -1;
    314 
    315 	*regex = regex_data_create();
    316 	if (!(*regex))
    317 		return -1;
    318 
    319 	(*regex)->owned = 0;
    320 	(*regex)->regex = (pcre *)mmap_area->next_addr;
    321 	rc = next_entry(NULL, mmap_area, entry_len);
    322 	if (rc < 0)
    323 		goto err;
    324 
    325 	/*
    326 	 * Check that regex lengths match. pcre_fullinfo()
    327 	 * also validates its magic number.
    328 	 */
    329 	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
    330 	if (rc < 0 || info_len != entry_len)
    331 		goto err;
    332 
    333 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
    334 	if (rc < 0 || !entry_len)
    335 		goto err;
    336 
    337 	if (entry_len) {
    338 		(*regex)->lsd.study_data = (void *)mmap_area->next_addr;
    339 		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
    340 		rc = next_entry(NULL, mmap_area, entry_len);
    341 		if (rc < 0)
    342 			goto err;
    343 
    344 		/* Check that study data lengths match. */
    345 		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
    346 				   PCRE_INFO_STUDYSIZE, &info_len);
    347 		if (rc < 0 || info_len != entry_len)
    348 			goto err;
    349 	}
    350 	return 0;
    351 
    352 err:
    353 	regex_data_free(*regex);
    354 	*regex = NULL;
    355 	return -1;
    356 }
    357 
    358 static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
    359 {
    360 	if (!regex) return NULL;
    361 	if (regex->owned) {
    362 		return regex->sd;
    363 	} else if (regex->lsd.study_data) {
    364 		return &regex->lsd;
    365 	} else {
    366 		return NULL;
    367 	}
    368 }
    369 
    370 int regex_writef(struct regex_data *regex, FILE *fp,
    371 		 int unused __attribute__((unused)))
    372 {
    373 	int rc;
    374 	size_t len;
    375 	uint32_t to_write;
    376 	size_t size;
    377 	pcre_extra *sd = get_pcre_extra(regex);
    378 
    379 	/* determine the size of the pcre data in bytes */
    380 	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
    381 	if (rc < 0)
    382 		return -1;
    383 
    384 	/* write the number of bytes in the pcre data */
    385 	to_write = size;
    386 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
    387 	if (len != 1)
    388 		return -1;
    389 
    390 	/* write the actual pcre data as a char array */
    391 	len = fwrite(regex->regex, 1, to_write, fp);
    392 	if (len != to_write)
    393 		return -1;
    394 
    395 	if (sd) {
    396 		/* determine the size of the pcre study info */
    397 		rc =
    398 		    pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
    399 		if (rc < 0)
    400 			return -1;
    401 	} else
    402 		size = 0;
    403 
    404 	/* write the number of bytes in the pcre study data */
    405 	to_write = size;
    406 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
    407 	if (len != 1)
    408 		return -1;
    409 
    410 	if (sd) {
    411 		/* write the actual pcre study data as a char array */
    412 		len = fwrite(sd->study_data, 1, to_write, fp);
    413 		if (len != to_write)
    414 			return -1;
    415 	}
    416 
    417 	return 0;
    418 }
    419 
    420 void regex_data_free(struct regex_data *regex)
    421 {
    422 	if (regex) {
    423 		if (regex->owned) {
    424 			if (regex->regex)
    425 				pcre_free(regex->regex);
    426 			if (regex->sd)
    427 				pcre_free_study(regex->sd);
    428 		}
    429 		free(regex);
    430 	}
    431 }
    432 
    433 int regex_match(struct regex_data *regex, char const *subject, int partial)
    434 {
    435 	int rc;
    436 
    437 	rc = pcre_exec(regex->regex, get_pcre_extra(regex),
    438 		       subject, strlen(subject), 0,
    439 		       partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
    440 	switch (rc) {
    441 	case 0:
    442 		return REGEX_MATCH;
    443 	case PCRE_ERROR_PARTIAL:
    444 		return REGEX_MATCH_PARTIAL;
    445 	case PCRE_ERROR_NOMATCH:
    446 		return REGEX_NO_MATCH;
    447 	default:
    448 		return REGEX_ERROR;
    449 	}
    450 }
    451 
    452 /*
    453  * TODO Replace this compare function with something that actually compares the
    454  * regular expressions.
    455  * This compare function basically just compares the binary representations of
    456  * the automatons, and because this representation contains pointers and
    457  * metadata, it can only return a match if regex1 == regex2.
    458  * Preferably, this function would be replaced with an algorithm that computes
    459  * the equivalence of the automatons systematically.
    460  */
    461 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
    462 {
    463 	int rc;
    464 	size_t len1, len2;
    465 	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
    466 	assert(rc == 0);
    467 	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
    468 	assert(rc == 0);
    469 	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
    470 		return SELABEL_INCOMPARABLE;
    471 
    472 	return SELABEL_EQUAL;
    473 }
    474 
    475 #endif
    476 
    477 struct regex_data *regex_data_create(void)
    478 {
    479 	return (struct regex_data *)calloc(1, sizeof(struct regex_data));
    480 }
    481 
    482 void regex_format_error(struct regex_error_data const *error_data, char *buffer,
    483 			size_t buf_size)
    484 {
    485 	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
    486 	char *ptr = &buffer[buf_size - the_end_length];
    487 	int rc = 0;
    488 	size_t pos = 0;
    489 	if (!buffer || !buf_size)
    490 		return;
    491 	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
    492 	if (rc < 0)
    493 		/*
    494 		 * If snprintf fails it constitutes a logical error that needs
    495 		 * fixing.
    496 		 */
    497 		abort();
    498 
    499 	pos += rc;
    500 	if (pos >= buf_size)
    501 		goto truncated;
    502 
    503 	if (error_data->error_offset > 0) {
    504 #ifdef USE_PCRE2
    505 		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
    506 			      error_data->error_offset);
    507 #else
    508 		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
    509 			      error_data->error_offset);
    510 #endif
    511 		if (rc < 0)
    512 			abort();
    513 	}
    514 	pos += rc;
    515 	if (pos >= buf_size)
    516 		goto truncated;
    517 
    518 #ifdef USE_PCRE2
    519 	rc = pcre2_get_error_message(error_data->error_code,
    520 				     (PCRE2_UCHAR *)(buffer + pos),
    521 				     buf_size - pos);
    522 	if (rc == PCRE2_ERROR_NOMEMORY)
    523 		goto truncated;
    524 #else
    525 	rc = snprintf(buffer + pos, buf_size - pos, "%s",
    526 		      error_data->error_buffer);
    527 	if (rc < 0)
    528 		abort();
    529 
    530 	if ((size_t)rc < strlen(error_data->error_buffer))
    531 		goto truncated;
    532 #endif
    533 
    534 	return;
    535 
    536 truncated:
    537 	/* replace end of string with "..." to indicate that it was truncated */
    538 	switch (the_end_length) {
    539 	/* no break statements, fall-through is intended */
    540 	case 4:
    541 		*ptr++ = '.';
    542 	case 3:
    543 		*ptr++ = '.';
    544 	case 2:
    545 		*ptr++ = '.';
    546 	case 1:
    547 		*ptr++ = '\0';
    548 	default:
    549 		break;
    550 	}
    551 	return;
    552 }
    553