Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10          New API code Copyright (c) 2016 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 /* This module contains a single function that scans through a compiled pattern
     43 until it finds a capturing bracket with the given number, or, if the number is
     44 negative, an instance of OP_REVERSE for a lookbehind. The function is called
     45 from pcre2_compile.c and also from pcre2_study.c when finding the minimum
     46 matching length. */
     47 
     48 
     49 #ifdef HAVE_CONFIG_H
     50 #include "config.h"
     51 #endif
     52 
     53 #include "pcre2_internal.h"
     54 
     55 
     56 /*************************************************
     57 *    Scan compiled regex for specific bracket    *
     58 *************************************************/
     59 
     60 /*
     61 Arguments:
     62   code        points to start of expression
     63   utf         TRUE in UTF mode
     64   number      the required bracket number or negative to find a lookbehind
     65 
     66 Returns:      pointer to the opcode for the bracket, or NULL if not found
     67 */
     68 
     69 PCRE2_SPTR
     70 PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
     71 {
     72 for (;;)
     73   {
     74   register PCRE2_UCHAR c = *code;
     75 
     76   if (c == OP_END) return NULL;
     77 
     78   /* XCLASS is used for classes that cannot be represented just by a bit map.
     79   This includes negated single high-valued characters. CALLOUT_STR is used for
     80   callouts with string arguments. In both cases the length in the table is
     81   zero; the actual length is stored in the compiled code. */
     82 
     83   if (c == OP_XCLASS) code += GET(code, 1);
     84     else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
     85 
     86   /* Handle lookbehind */
     87 
     88   else if (c == OP_REVERSE)
     89     {
     90     if (number < 0) return (PCRE2_UCHAR *)code;
     91     code += PRIV(OP_lengths)[c];
     92     }
     93 
     94   /* Handle capturing bracket */
     95 
     96   else if (c == OP_CBRA || c == OP_SCBRA ||
     97            c == OP_CBRAPOS || c == OP_SCBRAPOS)
     98     {
     99     int n = (int)GET2(code, 1+LINK_SIZE);
    100     if (n == number) return (PCRE2_UCHAR *)code;
    101     code += PRIV(OP_lengths)[c];
    102     }
    103 
    104   /* Otherwise, we can get the item's length from the table, except that for
    105   repeated character types, we have to test for \p and \P, which have an extra
    106   two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
    107   must add in its length. */
    108 
    109   else
    110     {
    111     switch(c)
    112       {
    113       case OP_TYPESTAR:
    114       case OP_TYPEMINSTAR:
    115       case OP_TYPEPLUS:
    116       case OP_TYPEMINPLUS:
    117       case OP_TYPEQUERY:
    118       case OP_TYPEMINQUERY:
    119       case OP_TYPEPOSSTAR:
    120       case OP_TYPEPOSPLUS:
    121       case OP_TYPEPOSQUERY:
    122       if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
    123       break;
    124 
    125       case OP_TYPEUPTO:
    126       case OP_TYPEMINUPTO:
    127       case OP_TYPEEXACT:
    128       case OP_TYPEPOSUPTO:
    129       if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
    130         code += 2;
    131       break;
    132 
    133       case OP_MARK:
    134       case OP_PRUNE_ARG:
    135       case OP_SKIP_ARG:
    136       case OP_THEN_ARG:
    137       code += code[1];
    138       break;
    139       }
    140 
    141     /* Add in the fixed length from the table */
    142 
    143     code += PRIV(OP_lengths)[c];
    144 
    145   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
    146   followed by a multi-byte character. The length in the table is a minimum, so
    147   we have to arrange to skip the extra bytes. */
    148 
    149 #ifdef MAYBE_UTF_MULTI
    150     if (utf) switch(c)
    151       {
    152       case OP_CHAR:
    153       case OP_CHARI:
    154       case OP_NOT:
    155       case OP_NOTI:
    156       case OP_EXACT:
    157       case OP_EXACTI:
    158       case OP_NOTEXACT:
    159       case OP_NOTEXACTI:
    160       case OP_UPTO:
    161       case OP_UPTOI:
    162       case OP_NOTUPTO:
    163       case OP_NOTUPTOI:
    164       case OP_MINUPTO:
    165       case OP_MINUPTOI:
    166       case OP_NOTMINUPTO:
    167       case OP_NOTMINUPTOI:
    168       case OP_POSUPTO:
    169       case OP_POSUPTOI:
    170       case OP_NOTPOSUPTO:
    171       case OP_NOTPOSUPTOI:
    172       case OP_STAR:
    173       case OP_STARI:
    174       case OP_NOTSTAR:
    175       case OP_NOTSTARI:
    176       case OP_MINSTAR:
    177       case OP_MINSTARI:
    178       case OP_NOTMINSTAR:
    179       case OP_NOTMINSTARI:
    180       case OP_POSSTAR:
    181       case OP_POSSTARI:
    182       case OP_NOTPOSSTAR:
    183       case OP_NOTPOSSTARI:
    184       case OP_PLUS:
    185       case OP_PLUSI:
    186       case OP_NOTPLUS:
    187       case OP_NOTPLUSI:
    188       case OP_MINPLUS:
    189       case OP_MINPLUSI:
    190       case OP_NOTMINPLUS:
    191       case OP_NOTMINPLUSI:
    192       case OP_POSPLUS:
    193       case OP_POSPLUSI:
    194       case OP_NOTPOSPLUS:
    195       case OP_NOTPOSPLUSI:
    196       case OP_QUERY:
    197       case OP_QUERYI:
    198       case OP_NOTQUERY:
    199       case OP_NOTQUERYI:
    200       case OP_MINQUERY:
    201       case OP_MINQUERYI:
    202       case OP_NOTMINQUERY:
    203       case OP_NOTMINQUERYI:
    204       case OP_POSQUERY:
    205       case OP_POSQUERYI:
    206       case OP_NOTPOSQUERY:
    207       case OP_NOTPOSQUERYI:
    208       if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
    209       break;
    210       }
    211 #else
    212     (void)(utf);  /* Keep compiler happy by referencing function argument */
    213 #endif  /* MAYBE_UTF_MULTI */
    214     }
    215   }
    216 }
    217 
    218 /* End of pcre2_find_bracket.c */
    219