Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10           New API code Copyright (c) 2016-2018 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 #ifdef HAVE_CONFIG_H
     43 #include "config.h"
     44 #endif
     45 
     46 #include "pcre2_internal.h"
     47 
     48 
     49 
     50 /*************************************************
     51 *   Copy named captured string to given buffer   *
     52 *************************************************/
     53 
     54 /* This function copies a single captured substring into a given buffer,
     55 identifying it by name. If the regex permits duplicate names, the first
     56 substring that is set is chosen.
     57 
     58 Arguments:
     59   match_data     points to the match data
     60   stringname     the name of the required substring
     61   buffer         where to put the substring
     62   sizeptr        the size of the buffer, updated to the size of the substring
     63 
     64 Returns:         if successful: zero
     65                  if not successful, a negative error code:
     66                    (1) an error from nametable_scan()
     67                    (2) an error from copy_bynumber()
     68                    (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
     69                    (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
     70 */
     71 
     72 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
     73 pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
     74   PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
     75 {
     76 PCRE2_SPTR first, last, entry;
     77 int failrc, entrysize;
     78 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
     79   return PCRE2_ERROR_DFA_UFUNC;
     80 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
     81   &first, &last);
     82 if (entrysize < 0) return entrysize;
     83 failrc = PCRE2_ERROR_UNAVAILABLE;
     84 for (entry = first; entry <= last; entry += entrysize)
     85   {
     86   uint32_t n = GET2(entry, 0);
     87   if (n < match_data->oveccount)
     88     {
     89     if (match_data->ovector[n*2] != PCRE2_UNSET)
     90       return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
     91     failrc = PCRE2_ERROR_UNSET;
     92     }
     93   }
     94 return failrc;
     95 }
     96 
     97 
     98 
     99 /*************************************************
    100 *  Copy numbered captured string to given buffer *
    101 *************************************************/
    102 
    103 /* This function copies a single captured substring into a given buffer,
    104 identifying it by number.
    105 
    106 Arguments:
    107   match_data     points to the match data
    108   stringnumber   the number of the required substring
    109   buffer         where to put the substring
    110   sizeptr        the size of the buffer, updated to the size of the substring
    111 
    112 Returns:         if successful: 0
    113                  if not successful, a negative error code:
    114                    PCRE2_ERROR_NOMEMORY: buffer too small
    115                    PCRE2_ERROR_NOSUBSTRING: no such substring
    116                    PCRE2_ERROR_UNAVAILABLE: ovector too small
    117                    PCRE2_ERROR_UNSET: substring is not set
    118 */
    119 
    120 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    121 pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
    122   uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
    123 {
    124 int rc;
    125 PCRE2_SIZE size;
    126 rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
    127 if (rc < 0) return rc;
    128 if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
    129 memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
    130   CU2BYTES(size));
    131 buffer[size] = 0;
    132 *sizeptr = size;
    133 return 0;
    134 }
    135 
    136 
    137 
    138 /*************************************************
    139 *          Extract named captured string         *
    140 *************************************************/
    141 
    142 /* This function copies a single captured substring, identified by name, into
    143 new memory. If the regex permits duplicate names, the first substring that is
    144 set is chosen.
    145 
    146 Arguments:
    147   match_data     pointer to match_data
    148   stringname     the name of the required substring
    149   stringptr      where to put the pointer to the new memory
    150   sizeptr        where to put the length of the substring
    151 
    152 Returns:         if successful: zero
    153                  if not successful, a negative value:
    154                    (1) an error from nametable_scan()
    155                    (2) an error from get_bynumber()
    156                    (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
    157                    (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
    158 */
    159 
    160 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    161 pcre2_substring_get_byname(pcre2_match_data *match_data,
    162   PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
    163 {
    164 PCRE2_SPTR first, last, entry;
    165 int failrc, entrysize;
    166 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
    167   return PCRE2_ERROR_DFA_UFUNC;
    168 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
    169   &first, &last);
    170 if (entrysize < 0) return entrysize;
    171 failrc = PCRE2_ERROR_UNAVAILABLE;
    172 for (entry = first; entry <= last; entry += entrysize)
    173   {
    174   uint32_t n = GET2(entry, 0);
    175   if (n < match_data->oveccount)
    176     {
    177     if (match_data->ovector[n*2] != PCRE2_UNSET)
    178       return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
    179     failrc = PCRE2_ERROR_UNSET;
    180     }
    181   }
    182 return failrc;
    183 }
    184 
    185 
    186 
    187 /*************************************************
    188 *      Extract captured string to new memory     *
    189 *************************************************/
    190 
    191 /* This function copies a single captured substring into a piece of new
    192 memory.
    193 
    194 Arguments:
    195   match_data     points to match data
    196   stringnumber   the number of the required substring
    197   stringptr      where to put a pointer to the new memory
    198   sizeptr        where to put the size of the substring
    199 
    200 Returns:         if successful: 0
    201                  if not successful, a negative error code:
    202                    PCRE2_ERROR_NOMEMORY: failed to get memory
    203                    PCRE2_ERROR_NOSUBSTRING: no such substring
    204                    PCRE2_ERROR_UNAVAILABLE: ovector too small
    205                    PCRE2_ERROR_UNSET: substring is not set
    206 */
    207 
    208 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    209 pcre2_substring_get_bynumber(pcre2_match_data *match_data,
    210   uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
    211 {
    212 int rc;
    213 PCRE2_SIZE size;
    214 PCRE2_UCHAR *yield;
    215 rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
    216 if (rc < 0) return rc;
    217 yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
    218   (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
    219 if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
    220 yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
    221 memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
    222   CU2BYTES(size));
    223 yield[size] = 0;
    224 *stringptr = yield;
    225 *sizeptr = size;
    226 return 0;
    227 }
    228 
    229 
    230 
    231 /*************************************************
    232 *       Free memory obtained by get_substring    *
    233 *************************************************/
    234 
    235 /*
    236 Argument:     the result of a previous pcre2_substring_get_byxxx()
    237 Returns:      nothing
    238 */
    239 
    240 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
    241 pcre2_substring_free(PCRE2_UCHAR *string)
    242 {
    243 if (string != NULL)
    244   {
    245   pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
    246   memctl->free(memctl, memctl->memory_data);
    247   }
    248 }
    249 
    250 
    251 
    252 /*************************************************
    253 *         Get length of a named substring        *
    254 *************************************************/
    255 
    256 /* This function returns the length of a named captured substring. If the regex
    257 permits duplicate names, the first substring that is set is chosen.
    258 
    259 Arguments:
    260   match_data      pointer to match data
    261   stringname      the name of the required substring
    262   sizeptr         where to put the length
    263 
    264 Returns:          0 if successful, else a negative error number
    265 */
    266 
    267 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    268 pcre2_substring_length_byname(pcre2_match_data *match_data,
    269   PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
    270 {
    271 PCRE2_SPTR first, last, entry;
    272 int failrc, entrysize;
    273 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
    274   return PCRE2_ERROR_DFA_UFUNC;
    275 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
    276   &first, &last);
    277 if (entrysize < 0) return entrysize;
    278 failrc = PCRE2_ERROR_UNAVAILABLE;
    279 for (entry = first; entry <= last; entry += entrysize)
    280   {
    281   uint32_t n = GET2(entry, 0);
    282   if (n < match_data->oveccount)
    283     {
    284     if (match_data->ovector[n*2] != PCRE2_UNSET)
    285       return pcre2_substring_length_bynumber(match_data, n, sizeptr);
    286     failrc = PCRE2_ERROR_UNSET;
    287     }
    288   }
    289 return failrc;
    290 }
    291 
    292 
    293 
    294 /*************************************************
    295 *        Get length of a numbered substring      *
    296 *************************************************/
    297 
    298 /* This function returns the length of a captured substring. If the start is
    299 beyond the end (which can happen when \K is used in an assertion), it sets the
    300 length to zero.
    301 
    302 Arguments:
    303   match_data      pointer to match data
    304   stringnumber    the number of the required substring
    305   sizeptr         where to put the length, if not NULL
    306 
    307 Returns:         if successful: 0
    308                  if not successful, a negative error code:
    309                    PCRE2_ERROR_NOSUBSTRING: no such substring
    310                    PCRE2_ERROR_UNAVAILABLE: ovector is too small
    311                    PCRE2_ERROR_UNSET: substring is not set
    312 */
    313 
    314 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    315 pcre2_substring_length_bynumber(pcre2_match_data *match_data,
    316   uint32_t stringnumber, PCRE2_SIZE *sizeptr)
    317 {
    318 PCRE2_SIZE left, right;
    319 int count = match_data->rc;
    320 if (count == PCRE2_ERROR_PARTIAL)
    321   {
    322   if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
    323   count = 0;
    324   }
    325 else if (count < 0) return count;            /* Match failed */
    326 
    327 if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
    328   {
    329   if (stringnumber > match_data->code->top_bracket)
    330     return PCRE2_ERROR_NOSUBSTRING;
    331   if (stringnumber >= match_data->oveccount)
    332     return PCRE2_ERROR_UNAVAILABLE;
    333   if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
    334     return PCRE2_ERROR_UNSET;
    335   }
    336 else  /* Matched using pcre2_dfa_match() */
    337   {
    338   if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
    339   if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
    340   }
    341 
    342 left = match_data->ovector[stringnumber*2];
    343 right = match_data->ovector[stringnumber*2+1];
    344 if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
    345 return 0;
    346 }
    347 
    348 
    349 
    350 /*************************************************
    351 *    Extract all captured strings to new memory  *
    352 *************************************************/
    353 
    354 /* This function gets one chunk of memory and builds a list of pointers and all
    355 the captured substrings in it. A NULL pointer is put on the end of the list.
    356 The substrings are zero-terminated, but also, if the final argument is
    357 non-NULL, a list of lengths is also returned. This allows binary data to be
    358 handled.
    359 
    360 Arguments:
    361   match_data     points to the match data
    362   listptr        set to point to the list of pointers
    363   lengthsptr     set to point to the list of lengths (may be NULL)
    364 
    365 Returns:         if successful: 0
    366                  if not successful, a negative error code:
    367                    PCRE2_ERROR_NOMEMORY: failed to get memory,
    368                    or a match failure code
    369 */
    370 
    371 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    372 pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
    373   PCRE2_SIZE **lengthsptr)
    374 {
    375 int i, count, count2;
    376 PCRE2_SIZE size;
    377 PCRE2_SIZE *lensp;
    378 pcre2_memctl *memp;
    379 PCRE2_UCHAR **listp;
    380 PCRE2_UCHAR *sp;
    381 PCRE2_SIZE *ovector;
    382 
    383 if ((count = match_data->rc) < 0) return count;   /* Match failed */
    384 if (count == 0) count = match_data->oveccount;    /* Ovector too small */
    385 
    386 count2 = 2*count;
    387 ovector = match_data->ovector;
    388 size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *);      /* For final NULL */
    389 if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count;  /* For lengths */
    390 
    391 for (i = 0; i < count2; i += 2)
    392   {
    393   size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
    394   if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
    395   }
    396 
    397 memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
    398 if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
    399 
    400 *listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
    401 lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
    402 
    403 if (lengthsptr == NULL)
    404   {
    405   sp = (PCRE2_UCHAR *)lensp;
    406   lensp = NULL;
    407   }
    408 else
    409   {
    410   *lengthsptr = lensp;
    411   sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
    412   }
    413 
    414 for (i = 0; i < count2; i += 2)
    415   {
    416   size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
    417 
    418   /* Size == 0 includes the case when the capture is unset. Avoid adding
    419   PCRE2_UNSET to match_data->subject because it overflows, even though with
    420   zero size calling memcpy() is harmless. */
    421 
    422   if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
    423   *listp++ = sp;
    424   if (lensp != NULL) *lensp++ = size;
    425   sp += size;
    426   *sp++ = 0;
    427   }
    428 
    429 *listp = NULL;
    430 return 0;
    431 }
    432 
    433 
    434 
    435 /*************************************************
    436 *   Free memory obtained by substring_list_get   *
    437 *************************************************/
    438 
    439 /*
    440 Argument:     the result of a previous pcre2_substring_list_get()
    441 Returns:      nothing
    442 */
    443 
    444 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
    445 pcre2_substring_list_free(PCRE2_SPTR *list)
    446 {
    447 if (list != NULL)
    448   {
    449   pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
    450   memctl->free(memctl, memctl->memory_data);
    451   }
    452 }
    453 
    454 
    455 
    456 /*************************************************
    457 *     Find (multiple) entries for named string   *
    458 *************************************************/
    459 
    460 /* This function scans the nametable for a given name, using binary chop. It
    461 returns either two pointers to the entries in the table, or, if no pointers are
    462 given, the number of a unique group with the given name. If duplicate names are
    463 permitted, and the name is not unique, an error is generated.
    464 
    465 Arguments:
    466   code        the compiled regex
    467   stringname  the name whose entries required
    468   firstptr    where to put the pointer to the first entry
    469   lastptr     where to put the pointer to the last entry
    470 
    471 Returns:      PCRE2_ERROR_NOSUBSTRING if the name is not found
    472               otherwise, if firstptr and lastptr are NULL:
    473                 a group number for a unique substring
    474                 else PCRE2_ERROR_NOUNIQUESUBSTRING
    475               otherwise:
    476                 the length of each entry, having set firstptr and lastptr
    477 */
    478 
    479 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    480 pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
    481   PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
    482 {
    483 uint16_t bot = 0;
    484 uint16_t top = code->name_count;
    485 uint16_t entrysize = code->name_entry_size;
    486 PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
    487 
    488 while (top > bot)
    489   {
    490   uint16_t mid = (top + bot) / 2;
    491   PCRE2_SPTR entry = nametable + entrysize*mid;
    492   int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
    493   if (c == 0)
    494     {
    495     PCRE2_SPTR first;
    496     PCRE2_SPTR last;
    497     PCRE2_SPTR lastentry;
    498     lastentry = nametable + entrysize * (code->name_count - 1);
    499     first = last = entry;
    500     while (first > nametable)
    501       {
    502       if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
    503       first -= entrysize;
    504       }
    505     while (last < lastentry)
    506       {
    507       if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
    508       last += entrysize;
    509       }
    510     if (firstptr == NULL) return (first == last)?
    511       (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
    512     *firstptr = first;
    513     *lastptr = last;
    514     return entrysize;
    515     }
    516   if (c > 0) bot = mid + 1; else top = mid;
    517   }
    518 
    519 return PCRE2_ERROR_NOSUBSTRING;
    520 }
    521 
    522 
    523 /*************************************************
    524 *           Find number for named string         *
    525 *************************************************/
    526 
    527 /* This function is a convenience wrapper for pcre2_substring_nametable_scan()
    528 when it is known that names are unique. If there are duplicate names, it is not
    529 defined which number is returned.
    530 
    531 Arguments:
    532   code        the compiled regex
    533   stringname  the name whose number is required
    534 
    535 Returns:      the number of the named parenthesis, or a negative number
    536                 PCRE2_ERROR_NOSUBSTRING if not found
    537                 PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
    538 */
    539 
    540 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    541 pcre2_substring_number_from_name(const pcre2_code *code,
    542   PCRE2_SPTR stringname)
    543 {
    544 return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
    545 }
    546 
    547 /* End of pcre2_substring.c */
    548