1 #include <assert.h> 2 #include <stdint.h> 3 #include <stdio.h> 4 #include <string.h> 5 6 #include "regex.h" 7 #include "label_file.h" 8 9 #ifdef USE_PCRE2 10 #define REGEX_ARCH_SIZE_T PCRE2_SIZE 11 #else 12 #define REGEX_ARCH_SIZE_T size_t 13 #endif 14 15 #ifndef __BYTE_ORDER__ 16 17 /* If the compiler doesn't define __BYTE_ORDER__, try to use the C 18 * library <endian.h> header definitions. */ 19 #include <endian.h> 20 #ifndef __BYTE_ORDER 21 #error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness. 22 #endif 23 24 #define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN 25 #define __ORDER_BIG_ENDIAN __BIG_ENDIAN 26 #define __BYTE_ORDER__ __BYTE_ORDER 27 28 #endif 29 30 #ifdef USE_PCRE2 31 char const *regex_arch_string(void) 32 { 33 static char arch_string_buffer[32]; 34 static char const *arch_string = ""; 35 char const *endianness = NULL; 36 int rc; 37 38 if (arch_string[0] == '\0') { 39 if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 40 endianness = "el"; 41 else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 42 endianness = "eb"; 43 44 if (!endianness) 45 return NULL; 46 47 rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer), 48 "%zu-%zu-%s", sizeof(void *), 49 sizeof(REGEX_ARCH_SIZE_T), 50 endianness); 51 if (rc < 0) 52 abort(); 53 54 arch_string = &arch_string_buffer[0]; 55 } 56 return arch_string; 57 } 58 59 struct regex_data { 60 pcre2_code *regex; /* compiled regular expression */ 61 /* 62 * match data block required for the compiled 63 * pattern in pcre2 64 */ 65 pcre2_match_data *match_data; 66 }; 67 68 int regex_prepare_data(struct regex_data **regex, char const *pattern_string, 69 struct regex_error_data *errordata) 70 { 71 memset(errordata, 0, sizeof(struct regex_error_data)); 72 73 *regex = regex_data_create(); 74 if (!(*regex)) 75 return -1; 76 77 (*regex)->regex = pcre2_compile( 78 (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL, 79 &errordata->error_code, &errordata->error_offset, NULL); 80 if (!(*regex)->regex) { 81 goto err; 82 } 83 84 (*regex)->match_data = 85 pcre2_match_data_create_from_pattern((*regex)->regex, NULL); 86 if (!(*regex)->match_data) { 87 goto err; 88 } 89 return 0; 90 91 err: 92 regex_data_free(*regex); 93 *regex = NULL; 94 return -1; 95 } 96 97 char const *regex_version(void) 98 { 99 static char version_buf[256]; 100 size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL); 101 if (len <= 0 || len > sizeof(version_buf)) 102 return NULL; 103 104 pcre2_config(PCRE2_CONFIG_VERSION, version_buf); 105 return version_buf; 106 } 107 108 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex, 109 int do_load_precompregex) 110 { 111 int rc; 112 uint32_t entry_len; 113 114 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); 115 if (rc < 0) 116 return -1; 117 118 if (entry_len && do_load_precompregex) { 119 /* 120 * this should yield exactly one because we store one pattern at 121 * a time 122 */ 123 rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr); 124 if (rc != 1) 125 return -1; 126 127 *regex = regex_data_create(); 128 if (!*regex) 129 return -1; 130 131 rc = pcre2_serialize_decode(&(*regex)->regex, 1, 132 (PCRE2_SPTR)mmap_area->next_addr, 133 NULL); 134 if (rc != 1) 135 goto err; 136 137 (*regex)->match_data = 138 pcre2_match_data_create_from_pattern((*regex)->regex, NULL); 139 if (!(*regex)->match_data) 140 goto err; 141 } 142 143 /* and skip the decoded bit */ 144 rc = next_entry(NULL, mmap_area, entry_len); 145 if (rc < 0) 146 goto err; 147 148 return 0; 149 err: 150 regex_data_free(*regex); 151 *regex = NULL; 152 return -1; 153 } 154 155 int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex) 156 { 157 int rc = 0; 158 size_t len; 159 PCRE2_SIZE serialized_size; 160 uint32_t to_write = 0; 161 PCRE2_UCHAR *bytes = NULL; 162 163 if (do_write_precompregex) { 164 /* encode the patter for serialization */ 165 rc = pcre2_serialize_encode((const pcre2_code **)®ex->regex, 166 1, &bytes, &serialized_size, NULL); 167 if (rc != 1) { 168 rc = -1; 169 goto out; 170 } 171 to_write = serialized_size; 172 } 173 174 /* write serialized pattern's size */ 175 len = fwrite(&to_write, sizeof(uint32_t), 1, fp); 176 if (len != 1) { 177 rc = -1; 178 goto out; 179 } 180 181 if (do_write_precompregex) { 182 /* write serialized pattern */ 183 len = fwrite(bytes, 1, to_write, fp); 184 if (len != to_write) 185 rc = -1; 186 } 187 188 out: 189 if (bytes) 190 pcre2_serialize_free(bytes); 191 192 return rc; 193 } 194 195 void regex_data_free(struct regex_data *regex) 196 { 197 if (regex) { 198 if (regex->regex) 199 pcre2_code_free(regex->regex); 200 if (regex->match_data) 201 pcre2_match_data_free(regex->match_data); 202 free(regex); 203 } 204 } 205 206 int regex_match(struct regex_data *regex, char const *subject, int partial) 207 { 208 int rc; 209 rc = pcre2_match( 210 regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, 211 partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL); 212 if (rc > 0) 213 return REGEX_MATCH; 214 switch (rc) { 215 case PCRE2_ERROR_PARTIAL: 216 return REGEX_MATCH_PARTIAL; 217 case PCRE2_ERROR_NOMATCH: 218 return REGEX_NO_MATCH; 219 default: 220 return REGEX_ERROR; 221 } 222 } 223 224 /* 225 * TODO Replace this compare function with something that actually compares the 226 * regular expressions. 227 * This compare function basically just compares the binary representations of 228 * the automatons, and because this representation contains pointers and 229 * metadata, it can only return a match if regex1 == regex2. 230 * Preferably, this function would be replaced with an algorithm that computes 231 * the equivalence of the automatons systematically. 232 */ 233 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2) 234 { 235 int rc; 236 size_t len1, len2; 237 rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1); 238 assert(rc == 0); 239 rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2); 240 assert(rc == 0); 241 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) 242 return SELABEL_INCOMPARABLE; 243 244 return SELABEL_EQUAL; 245 } 246 247 #else // !USE_PCRE2 248 char const *regex_arch_string(void) 249 { 250 return "N/A"; 251 } 252 253 /* Prior to version 8.20, libpcre did not have pcre_free_study() */ 254 #if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) 255 #define pcre_free_study pcre_free 256 #endif 257 258 struct regex_data { 259 int owned; /* 260 * non zero if regex and pcre_extra is owned by this 261 * structure and thus must be freed on destruction. 262 */ 263 pcre *regex; /* compiled regular expression */ 264 union { 265 pcre_extra *sd; /* pointer to extra compiled stuff */ 266 pcre_extra lsd; /* used to hold the mmap'd version */ 267 }; 268 }; 269 270 int regex_prepare_data(struct regex_data **regex, char const *pattern_string, 271 struct regex_error_data *errordata) 272 { 273 memset(errordata, 0, sizeof(struct regex_error_data)); 274 275 *regex = regex_data_create(); 276 if (!(*regex)) 277 return -1; 278 279 (*regex)->regex = 280 pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer, 281 &errordata->error_offset, NULL); 282 if (!(*regex)->regex) 283 goto err; 284 285 (*regex)->owned = 1; 286 287 (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer); 288 if (!(*regex)->sd && errordata->error_buffer) 289 goto err; 290 291 return 0; 292 293 err: 294 regex_data_free(*regex); 295 *regex = NULL; 296 return -1; 297 } 298 299 char const *regex_version(void) 300 { 301 return pcre_version(); 302 } 303 304 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex, 305 int unused __attribute__((unused))) 306 { 307 int rc; 308 uint32_t entry_len; 309 size_t info_len; 310 311 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); 312 if (rc < 0 || !entry_len) 313 return -1; 314 315 *regex = regex_data_create(); 316 if (!(*regex)) 317 return -1; 318 319 (*regex)->owned = 0; 320 (*regex)->regex = (pcre *)mmap_area->next_addr; 321 rc = next_entry(NULL, mmap_area, entry_len); 322 if (rc < 0) 323 goto err; 324 325 /* 326 * Check that regex lengths match. pcre_fullinfo() 327 * also validates its magic number. 328 */ 329 rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); 330 if (rc < 0 || info_len != entry_len) 331 goto err; 332 333 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); 334 if (rc < 0 || !entry_len) 335 goto err; 336 337 if (entry_len) { 338 (*regex)->lsd.study_data = (void *)mmap_area->next_addr; 339 (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; 340 rc = next_entry(NULL, mmap_area, entry_len); 341 if (rc < 0) 342 goto err; 343 344 /* Check that study data lengths match. */ 345 rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, 346 PCRE_INFO_STUDYSIZE, &info_len); 347 if (rc < 0 || info_len != entry_len) 348 goto err; 349 } 350 return 0; 351 352 err: 353 regex_data_free(*regex); 354 *regex = NULL; 355 return -1; 356 } 357 358 static inline pcre_extra *get_pcre_extra(struct regex_data *regex) 359 { 360 if (!regex) return NULL; 361 if (regex->owned) { 362 return regex->sd; 363 } else if (regex->lsd.study_data) { 364 return ®ex->lsd; 365 } else { 366 return NULL; 367 } 368 } 369 370 int regex_writef(struct regex_data *regex, FILE *fp, 371 int unused __attribute__((unused))) 372 { 373 int rc; 374 size_t len; 375 uint32_t to_write; 376 size_t size; 377 pcre_extra *sd = get_pcre_extra(regex); 378 379 /* determine the size of the pcre data in bytes */ 380 rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); 381 if (rc < 0) 382 return -1; 383 384 /* write the number of bytes in the pcre data */ 385 to_write = size; 386 len = fwrite(&to_write, sizeof(uint32_t), 1, fp); 387 if (len != 1) 388 return -1; 389 390 /* write the actual pcre data as a char array */ 391 len = fwrite(regex->regex, 1, to_write, fp); 392 if (len != to_write) 393 return -1; 394 395 if (sd) { 396 /* determine the size of the pcre study info */ 397 rc = 398 pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size); 399 if (rc < 0) 400 return -1; 401 } else 402 size = 0; 403 404 /* write the number of bytes in the pcre study data */ 405 to_write = size; 406 len = fwrite(&to_write, sizeof(uint32_t), 1, fp); 407 if (len != 1) 408 return -1; 409 410 if (sd) { 411 /* write the actual pcre study data as a char array */ 412 len = fwrite(sd->study_data, 1, to_write, fp); 413 if (len != to_write) 414 return -1; 415 } 416 417 return 0; 418 } 419 420 void regex_data_free(struct regex_data *regex) 421 { 422 if (regex) { 423 if (regex->owned) { 424 if (regex->regex) 425 pcre_free(regex->regex); 426 if (regex->sd) 427 pcre_free_study(regex->sd); 428 } 429 free(regex); 430 } 431 } 432 433 int regex_match(struct regex_data *regex, char const *subject, int partial) 434 { 435 int rc; 436 437 rc = pcre_exec(regex->regex, get_pcre_extra(regex), 438 subject, strlen(subject), 0, 439 partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0); 440 switch (rc) { 441 case 0: 442 return REGEX_MATCH; 443 case PCRE_ERROR_PARTIAL: 444 return REGEX_MATCH_PARTIAL; 445 case PCRE_ERROR_NOMATCH: 446 return REGEX_NO_MATCH; 447 default: 448 return REGEX_ERROR; 449 } 450 } 451 452 /* 453 * TODO Replace this compare function with something that actually compares the 454 * regular expressions. 455 * This compare function basically just compares the binary representations of 456 * the automatons, and because this representation contains pointers and 457 * metadata, it can only return a match if regex1 == regex2. 458 * Preferably, this function would be replaced with an algorithm that computes 459 * the equivalence of the automatons systematically. 460 */ 461 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2) 462 { 463 int rc; 464 size_t len1, len2; 465 rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); 466 assert(rc == 0); 467 rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); 468 assert(rc == 0); 469 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) 470 return SELABEL_INCOMPARABLE; 471 472 return SELABEL_EQUAL; 473 } 474 475 #endif 476 477 struct regex_data *regex_data_create(void) 478 { 479 return (struct regex_data *)calloc(1, sizeof(struct regex_data)); 480 } 481 482 void regex_format_error(struct regex_error_data const *error_data, char *buffer, 483 size_t buf_size) 484 { 485 unsigned the_end_length = buf_size > 4 ? 4 : buf_size; 486 char *ptr = &buffer[buf_size - the_end_length]; 487 int rc = 0; 488 size_t pos = 0; 489 if (!buffer || !buf_size) 490 return; 491 rc = snprintf(buffer, buf_size, "REGEX back-end error: "); 492 if (rc < 0) 493 /* 494 * If snprintf fails it constitutes a logical error that needs 495 * fixing. 496 */ 497 abort(); 498 499 pos += rc; 500 if (pos >= buf_size) 501 goto truncated; 502 503 if (error_data->error_offset > 0) { 504 #ifdef USE_PCRE2 505 rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ", 506 error_data->error_offset); 507 #else 508 rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ", 509 error_data->error_offset); 510 #endif 511 if (rc < 0) 512 abort(); 513 } 514 pos += rc; 515 if (pos >= buf_size) 516 goto truncated; 517 518 #ifdef USE_PCRE2 519 rc = pcre2_get_error_message(error_data->error_code, 520 (PCRE2_UCHAR *)(buffer + pos), 521 buf_size - pos); 522 if (rc == PCRE2_ERROR_NOMEMORY) 523 goto truncated; 524 #else 525 rc = snprintf(buffer + pos, buf_size - pos, "%s", 526 error_data->error_buffer); 527 if (rc < 0) 528 abort(); 529 530 if ((size_t)rc < strlen(error_data->error_buffer)) 531 goto truncated; 532 #endif 533 534 return; 535 536 truncated: 537 /* replace end of string with "..." to indicate that it was truncated */ 538 switch (the_end_length) { 539 /* no break statements, fall-through is intended */ 540 case 4: 541 *ptr++ = '.'; 542 case 3: 543 *ptr++ = '.'; 544 case 2: 545 *ptr++ = '.'; 546 case 1: 547 *ptr++ = '\0'; 548 default: 549 break; 550 } 551 return; 552 } 553