1 #include <assert.h> 2 #include <pthread.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <string.h> 6 7 #include "regex.h" 8 #include "label_file.h" 9 #include "selinux_internal.h" 10 11 #ifdef USE_PCRE2 12 #define REGEX_ARCH_SIZE_T PCRE2_SIZE 13 #else 14 #define REGEX_ARCH_SIZE_T size_t 15 #endif 16 17 #ifndef __BYTE_ORDER__ 18 19 /* If the compiler doesn't define __BYTE_ORDER__, try to use the C 20 * library <endian.h> header definitions. */ 21 #include <endian.h> 22 #ifndef __BYTE_ORDER 23 #error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness. 24 #endif 25 26 #define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN 27 #define __ORDER_BIG_ENDIAN __BIG_ENDIAN 28 #define __BYTE_ORDER__ __BYTE_ORDER 29 30 #endif 31 32 #ifdef USE_PCRE2 33 char const *regex_arch_string(void) 34 { 35 static char arch_string_buffer[32]; 36 static char const *arch_string = ""; 37 char const *endianness = NULL; 38 int rc; 39 40 if (arch_string[0] == '\0') { 41 if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 42 endianness = "el"; 43 else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 44 endianness = "eb"; 45 46 if (!endianness) 47 return NULL; 48 49 rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer), 50 "%zu-%zu-%s", sizeof(void *), 51 sizeof(REGEX_ARCH_SIZE_T), 52 endianness); 53 if (rc < 0) 54 abort(); 55 56 arch_string = &arch_string_buffer[0]; 57 } 58 return arch_string; 59 } 60 61 struct regex_data { 62 pcre2_code *regex; /* compiled regular expression */ 63 /* 64 * match data block required for the compiled 65 * pattern in pcre2 66 */ 67 pcre2_match_data *match_data; 68 pthread_mutex_t match_mutex; 69 }; 70 71 int regex_prepare_data(struct regex_data **regex, char const *pattern_string, 72 struct regex_error_data *errordata) 73 { 74 memset(errordata, 0, sizeof(struct regex_error_data)); 75 76 *regex = regex_data_create(); 77 if (!(*regex)) 78 return -1; 79 80 (*regex)->regex = pcre2_compile( 81 (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL, 82 &errordata->error_code, &errordata->error_offset, NULL); 83 if (!(*regex)->regex) { 84 goto err; 85 } 86 87 (*regex)->match_data = 88 pcre2_match_data_create_from_pattern((*regex)->regex, NULL); 89 if (!(*regex)->match_data) { 90 goto err; 91 } 92 return 0; 93 94 err: 95 regex_data_free(*regex); 96 *regex = NULL; 97 return -1; 98 } 99 100 char const *regex_version(void) 101 { 102 static char version_buf[256]; 103 size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL); 104 if (len <= 0 || len > sizeof(version_buf)) 105 return NULL; 106 107 pcre2_config(PCRE2_CONFIG_VERSION, version_buf); 108 return version_buf; 109 } 110 111 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex, 112 int do_load_precompregex, bool *regex_compiled) 113 { 114 int rc; 115 uint32_t entry_len; 116 117 *regex_compiled = false; 118 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); 119 if (rc < 0) 120 return -1; 121 122 if (entry_len && do_load_precompregex) { 123 /* 124 * this should yield exactly one because we store one pattern at 125 * a time 126 */ 127 rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr); 128 if (rc != 1) 129 return -1; 130 131 *regex = regex_data_create(); 132 if (!*regex) 133 return -1; 134 135 rc = pcre2_serialize_decode(&(*regex)->regex, 1, 136 (PCRE2_SPTR)mmap_area->next_addr, 137 NULL); 138 if (rc != 1) 139 goto err; 140 141 (*regex)->match_data = 142 pcre2_match_data_create_from_pattern((*regex)->regex, NULL); 143 if (!(*regex)->match_data) 144 goto err; 145 146 *regex_compiled = true; 147 } 148 149 /* and skip the decoded bit */ 150 rc = next_entry(NULL, mmap_area, entry_len); 151 if (rc < 0) 152 goto err; 153 154 return 0; 155 err: 156 regex_data_free(*regex); 157 *regex = NULL; 158 return -1; 159 } 160 161 int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex) 162 { 163 int rc = 0; 164 size_t len; 165 PCRE2_SIZE serialized_size; 166 uint32_t to_write = 0; 167 PCRE2_UCHAR *bytes = NULL; 168 169 if (do_write_precompregex) { 170 /* encode the patter for serialization */ 171 rc = pcre2_serialize_encode((const pcre2_code **)®ex->regex, 172 1, &bytes, &serialized_size, NULL); 173 if (rc != 1) { 174 rc = -1; 175 goto out; 176 } 177 to_write = serialized_size; 178 } 179 180 /* write serialized pattern's size */ 181 len = fwrite(&to_write, sizeof(uint32_t), 1, fp); 182 if (len != 1) { 183 rc = -1; 184 goto out; 185 } 186 187 if (do_write_precompregex) { 188 /* write serialized pattern */ 189 len = fwrite(bytes, 1, to_write, fp); 190 if (len != to_write) 191 rc = -1; 192 } 193 194 out: 195 if (bytes) 196 pcre2_serialize_free(bytes); 197 198 return rc; 199 } 200 201 void regex_data_free(struct regex_data *regex) 202 { 203 if (regex) { 204 if (regex->regex) 205 pcre2_code_free(regex->regex); 206 if (regex->match_data) 207 pcre2_match_data_free(regex->match_data); 208 __pthread_mutex_destroy(®ex->match_mutex); 209 free(regex); 210 } 211 } 212 213 int regex_match(struct regex_data *regex, char const *subject, int partial) 214 { 215 int rc; 216 __pthread_mutex_lock(®ex->match_mutex); 217 rc = pcre2_match( 218 regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, 219 partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL); 220 __pthread_mutex_unlock(®ex->match_mutex); 221 if (rc > 0) 222 return REGEX_MATCH; 223 switch (rc) { 224 case PCRE2_ERROR_PARTIAL: 225 return REGEX_MATCH_PARTIAL; 226 case PCRE2_ERROR_NOMATCH: 227 return REGEX_NO_MATCH; 228 default: 229 return REGEX_ERROR; 230 } 231 } 232 233 /* 234 * TODO Replace this compare function with something that actually compares the 235 * regular expressions. 236 * This compare function basically just compares the binary representations of 237 * the automatons, and because this representation contains pointers and 238 * metadata, it can only return a match if regex1 == regex2. 239 * Preferably, this function would be replaced with an algorithm that computes 240 * the equivalence of the automatons systematically. 241 */ 242 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2) 243 { 244 int rc; 245 size_t len1, len2; 246 rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1); 247 assert(rc == 0); 248 rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2); 249 assert(rc == 0); 250 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) 251 return SELABEL_INCOMPARABLE; 252 253 return SELABEL_EQUAL; 254 } 255 256 struct regex_data *regex_data_create(void) 257 { 258 struct regex_data *regex_data = 259 (struct regex_data *)calloc(1, sizeof(struct regex_data)); 260 __pthread_mutex_init(®ex_data->match_mutex, NULL); 261 return regex_data; 262 } 263 264 #else // !USE_PCRE2 265 char const *regex_arch_string(void) 266 { 267 return "N/A"; 268 } 269 270 /* Prior to version 8.20, libpcre did not have pcre_free_study() */ 271 #if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) 272 #define pcre_free_study pcre_free 273 #endif 274 275 struct regex_data { 276 int owned; /* 277 * non zero if regex and pcre_extra is owned by this 278 * structure and thus must be freed on destruction. 279 */ 280 pcre *regex; /* compiled regular expression */ 281 union { 282 pcre_extra *sd; /* pointer to extra compiled stuff */ 283 pcre_extra lsd; /* used to hold the mmap'd version */ 284 }; 285 }; 286 287 int regex_prepare_data(struct regex_data **regex, char const *pattern_string, 288 struct regex_error_data *errordata) 289 { 290 memset(errordata, 0, sizeof(struct regex_error_data)); 291 292 *regex = regex_data_create(); 293 if (!(*regex)) 294 return -1; 295 296 (*regex)->regex = 297 pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer, 298 &errordata->error_offset, NULL); 299 if (!(*regex)->regex) 300 goto err; 301 302 (*regex)->owned = 1; 303 304 (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer); 305 if (!(*regex)->sd && errordata->error_buffer) 306 goto err; 307 308 return 0; 309 310 err: 311 regex_data_free(*regex); 312 *regex = NULL; 313 return -1; 314 } 315 316 char const *regex_version(void) 317 { 318 return pcre_version(); 319 } 320 321 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex, 322 int unused __attribute__((unused)), bool *regex_compiled) 323 { 324 int rc; 325 uint32_t entry_len; 326 size_t info_len; 327 328 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); 329 if (rc < 0 || !entry_len) 330 return -1; 331 332 *regex = regex_data_create(); 333 if (!(*regex)) 334 return -1; 335 336 (*regex)->owned = 0; 337 (*regex)->regex = (pcre *)mmap_area->next_addr; 338 rc = next_entry(NULL, mmap_area, entry_len); 339 if (rc < 0) 340 goto err; 341 342 /* 343 * Check that regex lengths match. pcre_fullinfo() 344 * also validates its magic number. 345 */ 346 rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); 347 if (rc < 0 || info_len != entry_len) 348 goto err; 349 350 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); 351 if (rc < 0 || !entry_len) 352 goto err; 353 354 if (entry_len) { 355 (*regex)->lsd.study_data = (void *)mmap_area->next_addr; 356 (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; 357 rc = next_entry(NULL, mmap_area, entry_len); 358 if (rc < 0) 359 goto err; 360 361 /* Check that study data lengths match. */ 362 rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, 363 PCRE_INFO_STUDYSIZE, &info_len); 364 if (rc < 0 || info_len != entry_len) 365 goto err; 366 } 367 368 *regex_compiled = true; 369 return 0; 370 371 err: 372 regex_data_free(*regex); 373 *regex = NULL; 374 return -1; 375 } 376 377 static inline pcre_extra *get_pcre_extra(struct regex_data *regex) 378 { 379 if (!regex) return NULL; 380 if (regex->owned) { 381 return regex->sd; 382 } else if (regex->lsd.study_data) { 383 return ®ex->lsd; 384 } else { 385 return NULL; 386 } 387 } 388 389 int regex_writef(struct regex_data *regex, FILE *fp, 390 int unused __attribute__((unused))) 391 { 392 int rc; 393 size_t len; 394 uint32_t to_write; 395 size_t size; 396 pcre_extra *sd = get_pcre_extra(regex); 397 398 /* determine the size of the pcre data in bytes */ 399 rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); 400 if (rc < 0) 401 return -1; 402 403 /* write the number of bytes in the pcre data */ 404 to_write = size; 405 len = fwrite(&to_write, sizeof(uint32_t), 1, fp); 406 if (len != 1) 407 return -1; 408 409 /* write the actual pcre data as a char array */ 410 len = fwrite(regex->regex, 1, to_write, fp); 411 if (len != to_write) 412 return -1; 413 414 if (sd) { 415 /* determine the size of the pcre study info */ 416 rc = 417 pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size); 418 if (rc < 0) 419 return -1; 420 } else 421 size = 0; 422 423 /* write the number of bytes in the pcre study data */ 424 to_write = size; 425 len = fwrite(&to_write, sizeof(uint32_t), 1, fp); 426 if (len != 1) 427 return -1; 428 429 if (sd) { 430 /* write the actual pcre study data as a char array */ 431 len = fwrite(sd->study_data, 1, to_write, fp); 432 if (len != to_write) 433 return -1; 434 } 435 436 return 0; 437 } 438 439 void regex_data_free(struct regex_data *regex) 440 { 441 if (regex) { 442 if (regex->owned) { 443 if (regex->regex) 444 pcre_free(regex->regex); 445 if (regex->sd) 446 pcre_free_study(regex->sd); 447 } 448 free(regex); 449 } 450 } 451 452 int regex_match(struct regex_data *regex, char const *subject, int partial) 453 { 454 int rc; 455 456 rc = pcre_exec(regex->regex, get_pcre_extra(regex), 457 subject, strlen(subject), 0, 458 partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0); 459 switch (rc) { 460 case 0: 461 return REGEX_MATCH; 462 case PCRE_ERROR_PARTIAL: 463 return REGEX_MATCH_PARTIAL; 464 case PCRE_ERROR_NOMATCH: 465 return REGEX_NO_MATCH; 466 default: 467 return REGEX_ERROR; 468 } 469 } 470 471 /* 472 * TODO Replace this compare function with something that actually compares the 473 * regular expressions. 474 * This compare function basically just compares the binary representations of 475 * the automatons, and because this representation contains pointers and 476 * metadata, it can only return a match if regex1 == regex2. 477 * Preferably, this function would be replaced with an algorithm that computes 478 * the equivalence of the automatons systematically. 479 */ 480 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2) 481 { 482 int rc; 483 size_t len1, len2; 484 rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); 485 assert(rc == 0); 486 rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); 487 assert(rc == 0); 488 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) 489 return SELABEL_INCOMPARABLE; 490 491 return SELABEL_EQUAL; 492 } 493 494 struct regex_data *regex_data_create(void) 495 { 496 return (struct regex_data *)calloc(1, sizeof(struct regex_data)); 497 } 498 499 #endif 500 501 void regex_format_error(struct regex_error_data const *error_data, char *buffer, 502 size_t buf_size) 503 { 504 unsigned the_end_length = buf_size > 4 ? 4 : buf_size; 505 char *ptr = &buffer[buf_size - the_end_length]; 506 int rc = 0; 507 size_t pos = 0; 508 if (!buffer || !buf_size) 509 return; 510 rc = snprintf(buffer, buf_size, "REGEX back-end error: "); 511 if (rc < 0) 512 /* 513 * If snprintf fails it constitutes a logical error that needs 514 * fixing. 515 */ 516 abort(); 517 518 pos += rc; 519 if (pos >= buf_size) 520 goto truncated; 521 522 if (error_data->error_offset > 0) { 523 #ifdef USE_PCRE2 524 rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ", 525 error_data->error_offset); 526 #else 527 rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ", 528 error_data->error_offset); 529 #endif 530 if (rc < 0) 531 abort(); 532 } 533 pos += rc; 534 if (pos >= buf_size) 535 goto truncated; 536 537 #ifdef USE_PCRE2 538 rc = pcre2_get_error_message(error_data->error_code, 539 (PCRE2_UCHAR *)(buffer + pos), 540 buf_size - pos); 541 if (rc == PCRE2_ERROR_NOMEMORY) 542 goto truncated; 543 #else 544 rc = snprintf(buffer + pos, buf_size - pos, "%s", 545 error_data->error_buffer); 546 if (rc < 0) 547 abort(); 548 549 if ((size_t)rc < strlen(error_data->error_buffer)) 550 goto truncated; 551 #endif 552 553 return; 554 555 truncated: 556 /* replace end of string with "..." to indicate that it was truncated */ 557 switch (the_end_length) { 558 /* no break statements, fall-through is intended */ 559 case 4: 560 *ptr++ = '.'; 561 /* FALLTHRU */ 562 case 3: 563 *ptr++ = '.'; 564 /* FALLTHRU */ 565 case 2: 566 *ptr++ = '.'; 567 /* FALLTHRU */ 568 case 1: 569 *ptr++ = '\0'; 570 /* FALLTHRU */ 571 default: 572 break; 573 } 574 return; 575 } 576