Home | History | Annotate | Download | only in pcre
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9            Copyright (c) 1997-2008 University of Cambridge
     10 
     11 -----------------------------------------------------------------------------
     12 Redistribution and use in source and binary forms, with or without
     13 modification, are permitted provided that the following conditions are met:
     14 
     15     * Redistributions of source code must retain the above copyright notice,
     16       this list of conditions and the following disclaimer.
     17 
     18     * Redistributions in binary form must reproduce the above copyright
     19       notice, this list of conditions and the following disclaimer in the
     20       documentation and/or other materials provided with the distribution.
     21 
     22     * Neither the name of the University of Cambridge nor the names of its
     23       contributors may be used to endorse or promote products derived from
     24       this software without specific prior written permission.
     25 
     26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36 POSSIBILITY OF SUCH DAMAGE.
     37 -----------------------------------------------------------------------------
     38 */
     39 
     40 
     41 /* This module contains some convenience functions for extracting substrings
     42 from the subject string after a regex match has succeeded. The original idea
     43 for these functions came from Scott Wimer. */
     44 
     45 
     46 #ifdef HAVE_CONFIG_H
     47 #include "config.h"
     48 #endif
     49 
     50 #include "pcre_internal.h"
     51 
     52 
     53 /*************************************************
     54 *           Find number for named string         *
     55 *************************************************/
     56 
     57 /* This function is used by the get_first_set() function below, as well
     58 as being generally available. It assumes that names are unique.
     59 
     60 Arguments:
     61   code        the compiled regex
     62   stringname  the name whose number is required
     63 
     64 Returns:      the number of the named parentheses, or a negative number
     65                 (PCRE_ERROR_NOSUBSTRING) if not found
     66 */
     67 
     68 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
     69 pcre_get_stringnumber(const pcre *code, const char *stringname)
     70 {
     71 int rc;
     72 int entrysize;
     73 int top, bot;
     74 uschar *nametable;
     75 
     76 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
     77   return rc;
     78 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
     79 
     80 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
     81   return rc;
     82 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
     83   return rc;
     84 
     85 bot = 0;
     86 while (top > bot)
     87   {
     88   int mid = (top + bot) / 2;
     89   uschar *entry = nametable + entrysize*mid;
     90   int c = strcmp(stringname, (char *)(entry + 2));
     91   if (c == 0) return (entry[0] << 8) + entry[1];
     92   if (c > 0) bot = mid + 1; else top = mid;
     93   }
     94 
     95 return PCRE_ERROR_NOSUBSTRING;
     96 }
     97 
     98 
     99 
    100 /*************************************************
    101 *     Find (multiple) entries for named string   *
    102 *************************************************/
    103 
    104 /* This is used by the get_first_set() function below, as well as being
    105 generally available. It is used when duplicated names are permitted.
    106 
    107 Arguments:
    108   code        the compiled regex
    109   stringname  the name whose entries required
    110   firstptr    where to put the pointer to the first entry
    111   lastptr     where to put the pointer to the last entry
    112 
    113 Returns:      the length of each entry, or a negative number
    114                 (PCRE_ERROR_NOSUBSTRING) if not found
    115 */
    116 
    117 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    118 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
    119   char **firstptr, char **lastptr)
    120 {
    121 int rc;
    122 int entrysize;
    123 int top, bot;
    124 uschar *nametable, *lastentry;
    125 
    126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
    127   return rc;
    128 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
    129 
    130 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
    131   return rc;
    132 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
    133   return rc;
    134 
    135 lastentry = nametable + entrysize * (top - 1);
    136 bot = 0;
    137 while (top > bot)
    138   {
    139   int mid = (top + bot) / 2;
    140   uschar *entry = nametable + entrysize*mid;
    141   int c = strcmp(stringname, (char *)(entry + 2));
    142   if (c == 0)
    143     {
    144     uschar *first = entry;
    145     uschar *last = entry;
    146     while (first > nametable)
    147       {
    148       if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
    149       first -= entrysize;
    150       }
    151     while (last < lastentry)
    152       {
    153       if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
    154       last += entrysize;
    155       }
    156     *firstptr = (char *)first;
    157     *lastptr = (char *)last;
    158     return entrysize;
    159     }
    160   if (c > 0) bot = mid + 1; else top = mid;
    161   }
    162 
    163 return PCRE_ERROR_NOSUBSTRING;
    164 }
    165 
    166 
    167 
    168 /*************************************************
    169 *    Find first set of multiple named strings    *
    170 *************************************************/
    171 
    172 /* This function allows for duplicate names in the table of named substrings.
    173 It returns the number of the first one that was set in a pattern match.
    174 
    175 Arguments:
    176   code         the compiled regex
    177   stringname   the name of the capturing substring
    178   ovector      the vector of matched substrings
    179 
    180 Returns:       the number of the first that is set,
    181                or the number of the last one if none are set,
    182                or a negative number on error
    183 */
    184 
    185 static int
    186 get_first_set(const pcre *code, const char *stringname, int *ovector)
    187 {
    188 const real_pcre *re = (const real_pcre *)code;
    189 int entrysize;
    190 char *first, *last;
    191 uschar *entry;
    192 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
    193   return pcre_get_stringnumber(code, stringname);
    194 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
    195 if (entrysize <= 0) return entrysize;
    196 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
    197   {
    198   int n = (entry[0] << 8) + entry[1];
    199   if (ovector[n*2] >= 0) return n;
    200   }
    201 return (first[0] << 8) + first[1];
    202 }
    203 
    204 
    205 
    206 
    207 /*************************************************
    208 *      Copy captured string to given buffer      *
    209 *************************************************/
    210 
    211 /* This function copies a single captured substring into a given buffer.
    212 Note that we use memcpy() rather than strncpy() in case there are binary zeros
    213 in the string.
    214 
    215 Arguments:
    216   subject        the subject string that was matched
    217   ovector        pointer to the offsets table
    218   stringcount    the number of substrings that were captured
    219                    (i.e. the yield of the pcre_exec call, unless
    220                    that was zero, in which case it should be 1/3
    221                    of the offset table size)
    222   stringnumber   the number of the required substring
    223   buffer         where to put the substring
    224   size           the size of the buffer
    225 
    226 Returns:         if successful:
    227                    the length of the copied string, not including the zero
    228                    that is put on the end; can be zero
    229                  if not successful:
    230                    PCRE_ERROR_NOMEMORY (-6) buffer too small
    231                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
    232 */
    233 
    234 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    235 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
    236   int stringnumber, char *buffer, int size)
    237 {
    238 int yield;
    239 if (stringnumber < 0 || stringnumber >= stringcount)
    240   return PCRE_ERROR_NOSUBSTRING;
    241 stringnumber *= 2;
    242 yield = ovector[stringnumber+1] - ovector[stringnumber];
    243 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
    244 memcpy(buffer, subject + ovector[stringnumber], yield);
    245 buffer[yield] = 0;
    246 return yield;
    247 }
    248 
    249 
    250 
    251 /*************************************************
    252 *   Copy named captured string to given buffer   *
    253 *************************************************/
    254 
    255 /* This function copies a single captured substring into a given buffer,
    256 identifying it by name. If the regex permits duplicate names, the first
    257 substring that is set is chosen.
    258 
    259 Arguments:
    260   code           the compiled regex
    261   subject        the subject string that was matched
    262   ovector        pointer to the offsets table
    263   stringcount    the number of substrings that were captured
    264                    (i.e. the yield of the pcre_exec call, unless
    265                    that was zero, in which case it should be 1/3
    266                    of the offset table size)
    267   stringname     the name of the required substring
    268   buffer         where to put the substring
    269   size           the size of the buffer
    270 
    271 Returns:         if successful:
    272                    the length of the copied string, not including the zero
    273                    that is put on the end; can be zero
    274                  if not successful:
    275                    PCRE_ERROR_NOMEMORY (-6) buffer too small
    276                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
    277 */
    278 
    279 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    280 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
    281   int stringcount, const char *stringname, char *buffer, int size)
    282 {
    283 int n = get_first_set(code, stringname, ovector);
    284 if (n <= 0) return n;
    285 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
    286 }
    287 
    288 
    289 
    290 /*************************************************
    291 *      Copy all captured strings to new store    *
    292 *************************************************/
    293 
    294 /* This function gets one chunk of store and builds a list of pointers and all
    295 of the captured substrings in it. A NULL pointer is put on the end of the list.
    296 
    297 Arguments:
    298   subject        the subject string that was matched
    299   ovector        pointer to the offsets table
    300   stringcount    the number of substrings that were captured
    301                    (i.e. the yield of the pcre_exec call, unless
    302                    that was zero, in which case it should be 1/3
    303                    of the offset table size)
    304   listptr        set to point to the list of pointers
    305 
    306 Returns:         if successful: 0
    307                  if not successful:
    308                    PCRE_ERROR_NOMEMORY (-6) failed to get store
    309 */
    310 
    311 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    312 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
    313   const char ***listptr)
    314 {
    315 int i;
    316 int size = sizeof(char *);
    317 int double_count = stringcount * 2;
    318 char **stringlist;
    319 char *p;
    320 
    321 for (i = 0; i < double_count; i += 2)
    322   size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
    323 
    324 stringlist = (char **)(pcre_malloc)(size);
    325 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
    326 
    327 *listptr = (const char **)stringlist;
    328 p = (char *)(stringlist + stringcount + 1);
    329 
    330 for (i = 0; i < double_count; i += 2)
    331   {
    332   int len = ovector[i+1] - ovector[i];
    333   memcpy(p, subject + ovector[i], len);
    334   *stringlist++ = p;
    335   p += len;
    336   *p++ = 0;
    337   }
    338 
    339 *stringlist = NULL;
    340 return 0;
    341 }
    342 
    343 
    344 
    345 /*************************************************
    346 *   Free store obtained by get_substring_list    *
    347 *************************************************/
    348 
    349 /* This function exists for the benefit of people calling PCRE from non-C
    350 programs that can call its functions, but not free() or (pcre_free)() directly.
    351 
    352 Argument:   the result of a previous pcre_get_substring_list()
    353 Returns:    nothing
    354 */
    355 
    356 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
    357 pcre_free_substring_list(const char **pointer)
    358 {
    359 (pcre_free)((void *)pointer);
    360 }
    361 
    362 
    363 
    364 /*************************************************
    365 *      Copy captured string to new store         *
    366 *************************************************/
    367 
    368 /* This function copies a single captured substring into a piece of new
    369 store
    370 
    371 Arguments:
    372   subject        the subject string that was matched
    373   ovector        pointer to the offsets table
    374   stringcount    the number of substrings that were captured
    375                    (i.e. the yield of the pcre_exec call, unless
    376                    that was zero, in which case it should be 1/3
    377                    of the offset table size)
    378   stringnumber   the number of the required substring
    379   stringptr      where to put a pointer to the substring
    380 
    381 Returns:         if successful:
    382                    the length of the string, not including the zero that
    383                    is put on the end; can be zero
    384                  if not successful:
    385                    PCRE_ERROR_NOMEMORY (-6) failed to get store
    386                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
    387 */
    388 
    389 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    390 pcre_get_substring(const char *subject, int *ovector, int stringcount,
    391   int stringnumber, const char **stringptr)
    392 {
    393 int yield;
    394 char *substring;
    395 if (stringnumber < 0 || stringnumber >= stringcount)
    396   return PCRE_ERROR_NOSUBSTRING;
    397 stringnumber *= 2;
    398 yield = ovector[stringnumber+1] - ovector[stringnumber];
    399 substring = (char *)(pcre_malloc)(yield + 1);
    400 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
    401 memcpy(substring, subject + ovector[stringnumber], yield);
    402 substring[yield] = 0;
    403 *stringptr = substring;
    404 return yield;
    405 }
    406 
    407 
    408 
    409 /*************************************************
    410 *   Copy named captured string to new store      *
    411 *************************************************/
    412 
    413 /* This function copies a single captured substring, identified by name, into
    414 new store. If the regex permits duplicate names, the first substring that is
    415 set is chosen.
    416 
    417 Arguments:
    418   code           the compiled regex
    419   subject        the subject string that was matched
    420   ovector        pointer to the offsets table
    421   stringcount    the number of substrings that were captured
    422                    (i.e. the yield of the pcre_exec call, unless
    423                    that was zero, in which case it should be 1/3
    424                    of the offset table size)
    425   stringname     the name of the required substring
    426   stringptr      where to put the pointer
    427 
    428 Returns:         if successful:
    429                    the length of the copied string, not including the zero
    430                    that is put on the end; can be zero
    431                  if not successful:
    432                    PCRE_ERROR_NOMEMORY (-6) couldn't get memory
    433                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
    434 */
    435 
    436 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
    437 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
    438   int stringcount, const char *stringname, const char **stringptr)
    439 {
    440 int n = get_first_set(code, stringname, ovector);
    441 if (n <= 0) return n;
    442 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
    443 }
    444 
    445 
    446 
    447 
    448 /*************************************************
    449 *       Free store obtained by get_substring     *
    450 *************************************************/
    451 
    452 /* This function exists for the benefit of people calling PCRE from non-C
    453 programs that can call its functions, but not free() or (pcre_free)() directly.
    454 
    455 Argument:   the result of a previous pcre_get_substring()
    456 Returns:    nothing
    457 */
    458 
    459 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
    460 pcre_free_substring(const char *pointer)
    461 {
    462 (pcre_free)((void *)pointer);
    463 }
    464 
    465 /* End of pcre_get.c */
    466