Home | History | Annotate | Download | only in src
      1 /***************************************************************************
      2 Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
      3 tries to compile and match it, deriving options from the string itself. If
      4 STANDALONE is defined, a main program that calls the driver with the contents
      5 of specified files is compiled, and commentary on what is happening is output.
      6 If an argument starts with '=' the rest of it it is taken as a literal string
      7 rather than a file name. This allows easy testing of short strings.
      8 
      9 Written by Philip Hazel, October 2016
     10 ***************************************************************************/
     11 
     12 #include <errno.h>
     13 #include <stdio.h>
     14 #include <stdlib.h>
     15 #include <string.h>
     16 
     17 #define PCRE2_CODE_UNIT_WIDTH 8
     18 #include "pcre2.h"
     19 
     20 #define MAX_MATCH_SIZE 1000
     21 
     22 #define DFA_WORKSPACE_COUNT 100
     23 
     24 #define ALLOWED_COMPILE_OPTIONS \
     25   (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
     26    PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
     27    PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
     28    PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
     29    PCRE2_NO_AUTO_CAPTURE| \
     30    PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
     31    PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
     32    PCRE2_UTF)
     33 
     34 #define ALLOWED_MATCH_OPTIONS \
     35   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
     36    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
     37    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
     38 
     39 /* This is the callout function. Its only purpose is to halt matching if there
     40 are more than 100 callouts, as one way of stopping too much time being spent on
     41 fruitless matches. The callout data is a pointer to the counter. */
     42 
     43 static int callout_function(pcre2_callout_block *cb, void *callout_data)
     44 {
     45 (void)cb;  /* Avoid unused parameter warning */
     46 *((uint32_t *)callout_data) += 1;
     47 return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
     48 }
     49 
     50 /* Putting in this apparently unnecessary prototype prevents gcc from giving a
     51 "no previous prototype" warning when compiling at high warning level. */
     52 
     53 int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
     54 
     55 /* Here's the driving function. */
     56 
     57 int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
     58 {
     59 uint32_t compile_options;
     60 uint32_t match_options;
     61 pcre2_match_data *match_data = NULL;
     62 pcre2_match_context *match_context = NULL;
     63 size_t match_size;
     64 int dfa_workspace[DFA_WORKSPACE_COUNT];
     65 int r1, r2;
     66 int i;
     67 
     68 if (size < 1) return 0;
     69 
     70 /* Limiting the length of the subject for matching stops fruitless searches
     71 in large trees taking too much time. */
     72 
     73 match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
     74 
     75 /* Figure out some options to use. Initialize the random number to ensure
     76 repeatability. Ensure that we get a 32-bit unsigned random number for testing
     77 options. (RAND_MAX is required to be at least 32767, but is commonly
     78 2147483647, which excludes the top bit.) */
     79 
     80 srand((unsigned int)(data[size/2]));
     81 r1 = rand();
     82 r2 = rand();
     83 
     84 /* Ensure that all undefined option bits are zero (waste of time trying them)
     85 and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
     86 input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
     87 reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
     88 \C in random patterns is highly likely to cause a crash. */
     89 
     90 compile_options =
     91   ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
     92   PCRE2_NEVER_BACKSLASH_C;
     93 
     94 match_options =
     95   ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
     96 
     97 /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
     98 allowed together and just give an immediate error return. */
     99 
    100 if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
    101   match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
    102 
    103 /* Do the compile with and without the options, and after a successful compile,
    104 likewise do the match with and without the options. */
    105 
    106 for (i = 0; i < 2; i++)
    107   {
    108   uint32_t callout_count;
    109   int errorcode;
    110   PCRE2_SIZE erroroffset;
    111   pcre2_code *code;
    112 
    113 #ifdef STANDALONE
    114   printf("Compile options %.8x never_backslash_c", compile_options);
    115   printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
    116     ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
    117     ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
    118     ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
    119     ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
    120     ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
    121     ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
    122     ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
    123     ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
    124     ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
    125     ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
    126     ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
    127     ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
    128     ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
    129     ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
    130     ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
    131     ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
    132     ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
    133     ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
    134     ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
    135     ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
    136     ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
    137     ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
    138     ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
    139     ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
    140     ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
    141     ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
    142 #endif
    143 
    144   code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
    145     &errorcode, &erroroffset, NULL);
    146 
    147   /* Compilation succeeded */
    148 
    149   if (code != NULL)
    150     {
    151     int j;
    152     uint32_t save_match_options = match_options;
    153 
    154     /* Create match data and context blocks only when we first need them. Set
    155     low match and depth limits to avoid wasting too much searching large
    156     pattern trees. Almost all matches are going to fail. */
    157 
    158     if (match_data == NULL)
    159       {
    160       match_data = pcre2_match_data_create(32, NULL);
    161       if (match_data == NULL)
    162         {
    163 #ifdef STANDALONE
    164         printf("** Failed to create match data block\n");
    165 #endif
    166         return 0;
    167         }
    168       }
    169 
    170     if (match_context == NULL)
    171       {
    172       match_context = pcre2_match_context_create(NULL);
    173       if (match_context == NULL)
    174         {
    175 #ifdef STANDALONE
    176         printf("** Failed to create match context block\n");
    177 #endif
    178         return 0;
    179         }
    180       (void)pcre2_set_match_limit(match_context, 100);
    181       (void)pcre2_set_depth_limit(match_context, 100);
    182       (void)pcre2_set_callout(match_context, callout_function, &callout_count);
    183       }
    184 
    185     /* Match twice, with and without options. */
    186 
    187     for (j = 0; j < 2; j++)
    188       {
    189 #ifdef STANDALONE
    190       printf("Match options %.8x", match_options);
    191       printf("%s%s%s%s%s%s%s%s%s%s\n",
    192         ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
    193         ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
    194         ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
    195         ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
    196         ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
    197         ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
    198         ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
    199         ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
    200         ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
    201         ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
    202 #endif
    203 
    204       callout_count = 0;
    205       errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
    206         match_options, match_data, match_context);
    207 
    208 #ifdef STANDALONE
    209       if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
    210         {
    211         unsigned char buffer[256];
    212         pcre2_get_error_message(errorcode, buffer, 256);
    213         printf("Match failed: error %d: %s\n", errorcode, buffer);
    214         }
    215 #endif
    216 
    217       match_options = 0;  /* For second time */
    218       }
    219 
    220     /* Match with DFA twice, with and without options. */
    221 
    222     match_options = save_match_options & ~PCRE2_NO_JIT;  /* Not valid for DFA */
    223 
    224     for (j = 0; j < 2; j++)
    225       {
    226 #ifdef STANDALONE
    227       printf("DFA match options %.8x", match_options);
    228       printf("%s%s%s%s%s%s%s%s%s\n",
    229         ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
    230         ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
    231         ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
    232         ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
    233         ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
    234         ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
    235         ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
    236         ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
    237         ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
    238 #endif
    239 
    240       callout_count = 0;
    241       errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
    242         (PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
    243         dfa_workspace, DFA_WORKSPACE_COUNT);
    244 
    245 #ifdef STANDALONE
    246       if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
    247         {
    248         unsigned char buffer[256];
    249         pcre2_get_error_message(errorcode, buffer, 256);
    250         printf("Match failed: error %d: %s\n", errorcode, buffer);
    251         }
    252 #endif
    253 
    254       match_options = 0;  /* For second time */
    255       }
    256 
    257     match_options = save_match_options;  /* Reset for the second compile */
    258     pcre2_code_free(code);
    259     }
    260 
    261   /* Compilation failed */
    262 
    263   else
    264     {
    265     unsigned char buffer[256];
    266     pcre2_get_error_message(errorcode, buffer, 256);
    267 #ifdef STANDALONE
    268     printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
    269 #else
    270     if (strstr((const char *)buffer, "internal error") != NULL) abort();
    271 #endif
    272     }
    273 
    274   compile_options = PCRE2_NEVER_BACKSLASH_C;  /* For second time */
    275   }
    276 
    277 if (match_data != NULL) pcre2_match_data_free(match_data);
    278 if (match_context != NULL) pcre2_match_context_free(match_context);
    279 
    280 return 0;
    281 }
    282 
    283 
    284 /* Optional main program.  */
    285 
    286 #ifdef STANDALONE
    287 int main(int argc, char **argv)
    288 {
    289 int i;
    290 
    291 if (argc < 2)
    292   {
    293   printf("** No arguments given\n");
    294   return 0;
    295   }
    296 
    297 for (i = 1; i < argc; i++)
    298   {
    299   size_t filelen;
    300   size_t readsize;
    301   unsigned char *buffer;
    302   FILE *f;
    303 
    304   /* Handle a literal string. Copy to an exact size buffer so that checks for
    305   overrunning work. */
    306 
    307   if (argv[i][0] == '=')
    308     {
    309     readsize = strlen(argv[i]) - 1;
    310     printf("------ <Literal> ------\n");
    311     printf("Length = %lu\n", readsize);
    312     printf("%.*s\n", (int)readsize, argv[i]+1);
    313     buffer = (unsigned char *)malloc(readsize);
    314     if (buffer == NULL)
    315       printf("** Failed to allocate %lu bytes of memory\n", readsize);
    316     else
    317       {
    318       memcpy(buffer, argv[i]+1, readsize);
    319       LLVMFuzzerTestOneInput(buffer, readsize);
    320       free(buffer);
    321       }
    322     continue;
    323     }
    324 
    325   /* Handle a string given in a file */
    326 
    327   f = fopen(argv[i], "rb");
    328   if (f == NULL)
    329     {
    330     printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
    331     continue;
    332     }
    333 
    334   printf("------ %s ------\n", argv[i]);
    335 
    336   fseek(f, 0, SEEK_END);
    337   filelen = ftell(f);
    338   fseek(f, 0, SEEK_SET);
    339 
    340   buffer = (unsigned char *)malloc(filelen);
    341   if (buffer == NULL)
    342     {
    343     printf("** Failed to allocate %lu bytes of memory\n", filelen);
    344     fclose(f);
    345     continue;
    346     }
    347 
    348   readsize = fread(buffer, 1, filelen, f);
    349   fclose(f);
    350 
    351   if (readsize != filelen)
    352     printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
    353   else
    354     {
    355     printf("Length = %lu\n", filelen);
    356     LLVMFuzzerTestOneInput(buffer, filelen);
    357     }
    358   free(buffer);
    359   }
    360 
    361 return 0;
    362 }
    363 #endif  /* STANDALONE */
    364 
    365 /* End */
    366