Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10          New API code Copyright (c) 2016 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 #ifdef HAVE_CONFIG_H
     43 #include "config.h"
     44 #endif
     45 
     46 #include "pcre2_internal.h"
     47 
     48 
     49 /*************************************************
     50 *        Return info about compiled pattern      *
     51 *************************************************/
     52 
     53 /*
     54 Arguments:
     55   code          points to compiled code
     56   what          what information is required
     57   where         where to put the information; if NULL, return length
     58 
     59 Returns:        0 when data returned
     60                 > 0 when length requested
     61                 < 0 on error or unset value
     62 */
     63 
     64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
     65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
     66 {
     67 const pcre2_real_code *re = (pcre2_real_code *)code;
     68 
     69 if (where == NULL)   /* Requests field length */
     70   {
     71   switch(what)
     72     {
     73     case PCRE2_INFO_ALLOPTIONS:
     74     case PCRE2_INFO_ARGOPTIONS:
     75     case PCRE2_INFO_BACKREFMAX:
     76     case PCRE2_INFO_BSR:
     77     case PCRE2_INFO_CAPTURECOUNT:
     78     case PCRE2_INFO_FIRSTCODETYPE:
     79     case PCRE2_INFO_FIRSTCODEUNIT:
     80     case PCRE2_INFO_HASBACKSLASHC:
     81     case PCRE2_INFO_HASCRORLF:
     82     case PCRE2_INFO_JCHANGED:
     83     case PCRE2_INFO_LASTCODETYPE:
     84     case PCRE2_INFO_LASTCODEUNIT:
     85     case PCRE2_INFO_MATCHEMPTY:
     86     case PCRE2_INFO_MATCHLIMIT:
     87     case PCRE2_INFO_MAXLOOKBEHIND:
     88     case PCRE2_INFO_MINLENGTH:
     89     case PCRE2_INFO_NAMEENTRYSIZE:
     90     case PCRE2_INFO_NAMECOUNT:
     91     case PCRE2_INFO_NEWLINE:
     92     case PCRE2_INFO_RECURSIONLIMIT:
     93     return sizeof(uint32_t);
     94 
     95     case PCRE2_INFO_FIRSTBITMAP:
     96     return sizeof(const uint8_t *);
     97 
     98     case PCRE2_INFO_JITSIZE:
     99     case PCRE2_INFO_SIZE:
    100     return sizeof(size_t);
    101 
    102     case PCRE2_INFO_NAMETABLE:
    103     return sizeof(PCRE2_SPTR);
    104     }
    105   }
    106 
    107 if (re == NULL) return PCRE2_ERROR_NULL;
    108 
    109 /* Check that the first field in the block is the magic number. If it is not,
    110 return with PCRE2_ERROR_BADMAGIC. */
    111 
    112 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
    113 
    114 /* Check that this pattern was compiled in the correct bit mode */
    115 
    116 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
    117 
    118 switch(what)
    119   {
    120   case PCRE2_INFO_ALLOPTIONS:
    121   *((uint32_t *)where) = re->overall_options;
    122   break;
    123 
    124   case PCRE2_INFO_ARGOPTIONS:
    125   *((uint32_t *)where) = re->compile_options;
    126   break;
    127 
    128   case PCRE2_INFO_BACKREFMAX:
    129   *((uint32_t *)where) = re->top_backref;
    130   break;
    131 
    132   case PCRE2_INFO_BSR:
    133   *((uint32_t *)where) = re->bsr_convention;
    134   break;
    135 
    136   case PCRE2_INFO_CAPTURECOUNT:
    137   *((uint32_t *)where) = re->top_bracket;
    138   break;
    139 
    140   case PCRE2_INFO_FIRSTCODETYPE:
    141   *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
    142                          ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
    143   break;
    144 
    145   case PCRE2_INFO_FIRSTCODEUNIT:
    146   *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
    147     re->first_codeunit : 0;
    148   break;
    149 
    150   case PCRE2_INFO_FIRSTBITMAP:
    151   *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
    152     &(re->start_bitmap[0]) : NULL;
    153   break;
    154 
    155   case PCRE2_INFO_HASBACKSLASHC:
    156   *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
    157   break;
    158 
    159   case PCRE2_INFO_HASCRORLF:
    160   *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
    161   break;
    162 
    163   case PCRE2_INFO_JCHANGED:
    164   *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
    165   break;
    166 
    167   case PCRE2_INFO_JITSIZE:
    168 #ifdef SUPPORT_JIT
    169   *((size_t *)where) = (re->executable_jit != NULL)?
    170     PRIV(jit_get_size)(re->executable_jit) : 0;
    171 #else
    172   *((size_t *)where) = 0;
    173 #endif
    174   break;
    175 
    176   case PCRE2_INFO_LASTCODETYPE:
    177   *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
    178   break;
    179 
    180   case PCRE2_INFO_LASTCODEUNIT:
    181   *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
    182     re->last_codeunit : 0;
    183   break;
    184 
    185   case PCRE2_INFO_MATCHEMPTY:
    186   *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
    187   break;
    188 
    189   case PCRE2_INFO_MATCHLIMIT:
    190   *((uint32_t *)where) = re->limit_match;
    191   if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
    192   break;
    193 
    194   case PCRE2_INFO_MAXLOOKBEHIND:
    195   *((uint32_t *)where) = re->max_lookbehind;
    196   break;
    197 
    198   case PCRE2_INFO_MINLENGTH:
    199   *((uint32_t *)where) = re->minlength;
    200   break;
    201 
    202   case PCRE2_INFO_NAMEENTRYSIZE:
    203   *((uint32_t *)where) = re->name_entry_size;
    204   break;
    205 
    206   case PCRE2_INFO_NAMECOUNT:
    207   *((uint32_t *)where) = re->name_count;
    208   break;
    209 
    210   case PCRE2_INFO_NAMETABLE:
    211   *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
    212   break;
    213 
    214   case PCRE2_INFO_NEWLINE:
    215   *((uint32_t *)where) = re->newline_convention;
    216   break;
    217 
    218   case PCRE2_INFO_RECURSIONLIMIT:
    219   *((uint32_t *)where) = re->limit_recursion;
    220   if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
    221   break;
    222 
    223   case PCRE2_INFO_SIZE:
    224   *((size_t *)where) = re->blocksize;
    225   break;
    226 
    227   default: return PCRE2_ERROR_BADOPTION;
    228   }
    229 
    230 return 0;
    231 }
    232 
    233 
    234 
    235 /*************************************************
    236 *              Callout enumerator                *
    237 *************************************************/
    238 
    239 /*
    240 Arguments:
    241   code          points to compiled code
    242   callback      function called for each callout block
    243   callout_data  user data passed to the callback
    244 
    245 Returns:        0 when successfully completed
    246                 < 0 on local error
    247                != 0 for callback error
    248 */
    249 
    250 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    251 pcre2_callout_enumerate(const pcre2_code *code,
    252   int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
    253 {
    254 pcre2_real_code *re = (pcre2_real_code *)code;
    255 pcre2_callout_enumerate_block cb;
    256 PCRE2_SPTR cc;
    257 #ifdef SUPPORT_UNICODE
    258 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
    259 #endif
    260 
    261 if (re == NULL) return PCRE2_ERROR_NULL;
    262 
    263 /* Check that the first field in the block is the magic number. If it is not,
    264 return with PCRE2_ERROR_BADMAGIC. */
    265 
    266 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
    267 
    268 /* Check that this pattern was compiled in the correct bit mode */
    269 
    270 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
    271 
    272 cb.version = 0;
    273 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
    274      + re->name_count * re->name_entry_size;
    275 
    276 while (TRUE)
    277   {
    278   int rc;
    279   switch (*cc)
    280     {
    281     case OP_END:
    282     return 0;
    283 
    284     case OP_CHAR:
    285     case OP_CHARI:
    286     case OP_NOT:
    287     case OP_NOTI:
    288     case OP_STAR:
    289     case OP_MINSTAR:
    290     case OP_PLUS:
    291     case OP_MINPLUS:
    292     case OP_QUERY:
    293     case OP_MINQUERY:
    294     case OP_UPTO:
    295     case OP_MINUPTO:
    296     case OP_EXACT:
    297     case OP_POSSTAR:
    298     case OP_POSPLUS:
    299     case OP_POSQUERY:
    300     case OP_POSUPTO:
    301     case OP_STARI:
    302     case OP_MINSTARI:
    303     case OP_PLUSI:
    304     case OP_MINPLUSI:
    305     case OP_QUERYI:
    306     case OP_MINQUERYI:
    307     case OP_UPTOI:
    308     case OP_MINUPTOI:
    309     case OP_EXACTI:
    310     case OP_POSSTARI:
    311     case OP_POSPLUSI:
    312     case OP_POSQUERYI:
    313     case OP_POSUPTOI:
    314     case OP_NOTSTAR:
    315     case OP_NOTMINSTAR:
    316     case OP_NOTPLUS:
    317     case OP_NOTMINPLUS:
    318     case OP_NOTQUERY:
    319     case OP_NOTMINQUERY:
    320     case OP_NOTUPTO:
    321     case OP_NOTMINUPTO:
    322     case OP_NOTEXACT:
    323     case OP_NOTPOSSTAR:
    324     case OP_NOTPOSPLUS:
    325     case OP_NOTPOSQUERY:
    326     case OP_NOTPOSUPTO:
    327     case OP_NOTSTARI:
    328     case OP_NOTMINSTARI:
    329     case OP_NOTPLUSI:
    330     case OP_NOTMINPLUSI:
    331     case OP_NOTQUERYI:
    332     case OP_NOTMINQUERYI:
    333     case OP_NOTUPTOI:
    334     case OP_NOTMINUPTOI:
    335     case OP_NOTEXACTI:
    336     case OP_NOTPOSSTARI:
    337     case OP_NOTPOSPLUSI:
    338     case OP_NOTPOSQUERYI:
    339     case OP_NOTPOSUPTOI:
    340     cc += PRIV(OP_lengths)[*cc];
    341 #ifdef SUPPORT_UNICODE
    342     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
    343 #endif
    344     break;
    345 
    346     case OP_TYPESTAR:
    347     case OP_TYPEMINSTAR:
    348     case OP_TYPEPLUS:
    349     case OP_TYPEMINPLUS:
    350     case OP_TYPEQUERY:
    351     case OP_TYPEMINQUERY:
    352     case OP_TYPEUPTO:
    353     case OP_TYPEMINUPTO:
    354     case OP_TYPEEXACT:
    355     case OP_TYPEPOSSTAR:
    356     case OP_TYPEPOSPLUS:
    357     case OP_TYPEPOSQUERY:
    358     case OP_TYPEPOSUPTO:
    359     cc += PRIV(OP_lengths)[*cc];
    360 #ifdef SUPPORT_UNICODE
    361     if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
    362 #endif
    363     break;
    364 
    365 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
    366     case OP_XCLASS:
    367     cc += GET(cc, 1);
    368     break;
    369 #endif
    370 
    371     case OP_MARK:
    372     case OP_PRUNE_ARG:
    373     case OP_SKIP_ARG:
    374     case OP_THEN_ARG:
    375     cc += PRIV(OP_lengths)[*cc] + cc[1];
    376     break;
    377 
    378     case OP_CALLOUT:
    379     cb.pattern_position = GET(cc, 1);
    380     cb.next_item_length = GET(cc, 1 + LINK_SIZE);
    381     cb.callout_number = cc[1 + 2*LINK_SIZE];
    382     cb.callout_string_offset = 0;
    383     cb.callout_string_length = 0;
    384     cb.callout_string = NULL;
    385     rc = callback(&cb, callout_data);
    386     if (rc != 0) return rc;
    387     cc += PRIV(OP_lengths)[*cc];
    388     break;
    389 
    390     case OP_CALLOUT_STR:
    391     cb.pattern_position = GET(cc, 1);
    392     cb.next_item_length = GET(cc, 1 + LINK_SIZE);
    393     cb.callout_number = 0;
    394     cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
    395     cb.callout_string_length =
    396       GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
    397     cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
    398     rc = callback(&cb, callout_data);
    399     if (rc != 0) return rc;
    400     cc += GET(cc, 1 + 2*LINK_SIZE);
    401     break;
    402 
    403     default:
    404     cc += PRIV(OP_lengths)[*cc];
    405     break;
    406     }
    407   }
    408 }
    409 
    410 /* End of pcre2_pattern_info.c */
    411