1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifdef HAVE_CONFIG_H 43 #include "config.h" 44 #endif 45 46 #include "pcre2_internal.h" 47 48 49 50 /************************************************* 51 * Copy named captured string to given buffer * 52 *************************************************/ 53 54 /* This function copies a single captured substring into a given buffer, 55 identifying it by name. If the regex permits duplicate names, the first 56 substring that is set is chosen. 57 58 Arguments: 59 match_data points to the match data 60 stringname the name of the required substring 61 buffer where to put the substring 62 sizeptr the size of the buffer, updated to the size of the substring 63 64 Returns: if successful: zero 65 if not successful, a negative error code: 66 (1) an error from nametable_scan() 67 (2) an error from copy_bynumber() 68 (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector 69 (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset 70 */ 71 72 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 73 pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname, 74 PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr) 75 { 76 PCRE2_SPTR first, last, entry; 77 int failrc, entrysize; 78 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER) 79 return PCRE2_ERROR_DFA_UFUNC; 80 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname, 81 &first, &last); 82 if (entrysize < 0) return entrysize; 83 failrc = PCRE2_ERROR_UNAVAILABLE; 84 for (entry = first; entry <= last; entry += entrysize) 85 { 86 uint32_t n = GET2(entry, 0); 87 if (n < match_data->oveccount) 88 { 89 if (match_data->ovector[n*2] != PCRE2_UNSET) 90 return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr); 91 failrc = PCRE2_ERROR_UNSET; 92 } 93 } 94 return failrc; 95 } 96 97 98 99 /************************************************* 100 * Copy numbered captured string to given buffer * 101 *************************************************/ 102 103 /* This function copies a single captured substring into a given buffer, 104 identifying it by number. 105 106 Arguments: 107 match_data points to the match data 108 stringnumber the number of the required substring 109 buffer where to put the substring 110 sizeptr the size of the buffer, updated to the size of the substring 111 112 Returns: if successful: 0 113 if not successful, a negative error code: 114 PCRE2_ERROR_NOMEMORY: buffer too small 115 PCRE2_ERROR_NOSUBSTRING: no such substring 116 PCRE2_ERROR_UNAVAILABLE: ovector too small 117 PCRE2_ERROR_UNSET: substring is not set 118 */ 119 120 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 121 pcre2_substring_copy_bynumber(pcre2_match_data *match_data, 122 uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr) 123 { 124 int rc; 125 PCRE2_SIZE size; 126 rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size); 127 if (rc < 0) return rc; 128 if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY; 129 memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2], 130 CU2BYTES(size)); 131 buffer[size] = 0; 132 *sizeptr = size; 133 return 0; 134 } 135 136 137 138 /************************************************* 139 * Extract named captured string * 140 *************************************************/ 141 142 /* This function copies a single captured substring, identified by name, into 143 new memory. If the regex permits duplicate names, the first substring that is 144 set is chosen. 145 146 Arguments: 147 match_data pointer to match_data 148 stringname the name of the required substring 149 stringptr where to put the pointer to the new memory 150 sizeptr where to put the length of the substring 151 152 Returns: if successful: zero 153 if not successful, a negative value: 154 (1) an error from nametable_scan() 155 (2) an error from get_bynumber() 156 (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector 157 (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset 158 */ 159 160 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 161 pcre2_substring_get_byname(pcre2_match_data *match_data, 162 PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr) 163 { 164 PCRE2_SPTR first, last, entry; 165 int failrc, entrysize; 166 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER) 167 return PCRE2_ERROR_DFA_UFUNC; 168 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname, 169 &first, &last); 170 if (entrysize < 0) return entrysize; 171 failrc = PCRE2_ERROR_UNAVAILABLE; 172 for (entry = first; entry <= last; entry += entrysize) 173 { 174 uint32_t n = GET2(entry, 0); 175 if (n < match_data->oveccount) 176 { 177 if (match_data->ovector[n*2] != PCRE2_UNSET) 178 return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr); 179 failrc = PCRE2_ERROR_UNSET; 180 } 181 } 182 return failrc; 183 } 184 185 186 187 /************************************************* 188 * Extract captured string to new memory * 189 *************************************************/ 190 191 /* This function copies a single captured substring into a piece of new 192 memory. 193 194 Arguments: 195 match_data points to match data 196 stringnumber the number of the required substring 197 stringptr where to put a pointer to the new memory 198 sizeptr where to put the size of the substring 199 200 Returns: if successful: 0 201 if not successful, a negative error code: 202 PCRE2_ERROR_NOMEMORY: failed to get memory 203 PCRE2_ERROR_NOSUBSTRING: no such substring 204 PCRE2_ERROR_UNAVAILABLE: ovector too small 205 PCRE2_ERROR_UNSET: substring is not set 206 */ 207 208 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 209 pcre2_substring_get_bynumber(pcre2_match_data *match_data, 210 uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr) 211 { 212 int rc; 213 PCRE2_SIZE size; 214 PCRE2_UCHAR *yield; 215 rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size); 216 if (rc < 0) return rc; 217 yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + 218 (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data); 219 if (yield == NULL) return PCRE2_ERROR_NOMEMORY; 220 yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl)); 221 memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2], 222 CU2BYTES(size)); 223 yield[size] = 0; 224 *stringptr = yield; 225 *sizeptr = size; 226 return 0; 227 } 228 229 230 231 /************************************************* 232 * Free memory obtained by get_substring * 233 *************************************************/ 234 235 /* 236 Argument: the result of a previous pcre2_substring_get_byxxx() 237 Returns: nothing 238 */ 239 240 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 241 pcre2_substring_free(PCRE2_UCHAR *string) 242 { 243 if (string != NULL) 244 { 245 pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl)); 246 memctl->free(memctl, memctl->memory_data); 247 } 248 } 249 250 251 252 /************************************************* 253 * Get length of a named substring * 254 *************************************************/ 255 256 /* This function returns the length of a named captured substring. If the regex 257 permits duplicate names, the first substring that is set is chosen. 258 259 Arguments: 260 match_data pointer to match data 261 stringname the name of the required substring 262 sizeptr where to put the length 263 264 Returns: 0 if successful, else a negative error number 265 */ 266 267 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 268 pcre2_substring_length_byname(pcre2_match_data *match_data, 269 PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr) 270 { 271 PCRE2_SPTR first, last, entry; 272 int failrc, entrysize; 273 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER) 274 return PCRE2_ERROR_DFA_UFUNC; 275 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname, 276 &first, &last); 277 if (entrysize < 0) return entrysize; 278 failrc = PCRE2_ERROR_UNAVAILABLE; 279 for (entry = first; entry <= last; entry += entrysize) 280 { 281 uint32_t n = GET2(entry, 0); 282 if (n < match_data->oveccount) 283 { 284 if (match_data->ovector[n*2] != PCRE2_UNSET) 285 return pcre2_substring_length_bynumber(match_data, n, sizeptr); 286 failrc = PCRE2_ERROR_UNSET; 287 } 288 } 289 return failrc; 290 } 291 292 293 294 /************************************************* 295 * Get length of a numbered substring * 296 *************************************************/ 297 298 /* This function returns the length of a captured substring. If the start is 299 beyond the end (which can happen when \K is used in an assertion), it sets the 300 length to zero. 301 302 Arguments: 303 match_data pointer to match data 304 stringnumber the number of the required substring 305 sizeptr where to put the length, if not NULL 306 307 Returns: if successful: 0 308 if not successful, a negative error code: 309 PCRE2_ERROR_NOSUBSTRING: no such substring 310 PCRE2_ERROR_UNAVAILABLE: ovector is too small 311 PCRE2_ERROR_UNSET: substring is not set 312 */ 313 314 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 315 pcre2_substring_length_bynumber(pcre2_match_data *match_data, 316 uint32_t stringnumber, PCRE2_SIZE *sizeptr) 317 { 318 PCRE2_SIZE left, right; 319 int count = match_data->rc; 320 if (count == PCRE2_ERROR_PARTIAL) 321 { 322 if (stringnumber > 0) return PCRE2_ERROR_PARTIAL; 323 count = 0; 324 } 325 else if (count < 0) return count; /* Match failed */ 326 327 if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER) 328 { 329 if (stringnumber > match_data->code->top_bracket) 330 return PCRE2_ERROR_NOSUBSTRING; 331 if (stringnumber >= match_data->oveccount) 332 return PCRE2_ERROR_UNAVAILABLE; 333 if (match_data->ovector[stringnumber*2] == PCRE2_UNSET) 334 return PCRE2_ERROR_UNSET; 335 } 336 else /* Matched using pcre2_dfa_match() */ 337 { 338 if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE; 339 if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET; 340 } 341 342 left = match_data->ovector[stringnumber*2]; 343 right = match_data->ovector[stringnumber*2+1]; 344 if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left; 345 return 0; 346 } 347 348 349 350 /************************************************* 351 * Extract all captured strings to new memory * 352 *************************************************/ 353 354 /* This function gets one chunk of memory and builds a list of pointers and all 355 the captured substrings in it. A NULL pointer is put on the end of the list. 356 The substrings are zero-terminated, but also, if the final argument is 357 non-NULL, a list of lengths is also returned. This allows binary data to be 358 handled. 359 360 Arguments: 361 match_data points to the match data 362 listptr set to point to the list of pointers 363 lengthsptr set to point to the list of lengths (may be NULL) 364 365 Returns: if successful: 0 366 if not successful, a negative error code: 367 PCRE2_ERROR_NOMEMORY: failed to get memory, 368 or a match failure code 369 */ 370 371 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 372 pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, 373 PCRE2_SIZE **lengthsptr) 374 { 375 int i, count, count2; 376 PCRE2_SIZE size; 377 PCRE2_SIZE *lensp; 378 pcre2_memctl *memp; 379 PCRE2_UCHAR **listp; 380 PCRE2_UCHAR *sp; 381 PCRE2_SIZE *ovector; 382 383 if ((count = match_data->rc) < 0) return count; /* Match failed */ 384 if (count == 0) count = match_data->oveccount; /* Ovector too small */ 385 386 count2 = 2*count; 387 ovector = match_data->ovector; 388 size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */ 389 if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */ 390 391 for (i = 0; i < count2; i += 2) 392 { 393 size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1); 394 if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]); 395 } 396 397 memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data); 398 if (memp == NULL) return PCRE2_ERROR_NOMEMORY; 399 400 *listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl)); 401 lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1)); 402 403 if (lengthsptr == NULL) 404 { 405 sp = (PCRE2_UCHAR *)lensp; 406 lensp = NULL; 407 } 408 else 409 { 410 *lengthsptr = lensp; 411 sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count); 412 } 413 414 for (i = 0; i < count2; i += 2) 415 { 416 size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0; 417 memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size)); 418 *listp++ = sp; 419 if (lensp != NULL) *lensp++ = size; 420 sp += size; 421 *sp++ = 0; 422 } 423 424 *listp = NULL; 425 return 0; 426 } 427 428 429 430 /************************************************* 431 * Free memory obtained by substring_list_get * 432 *************************************************/ 433 434 /* 435 Argument: the result of a previous pcre2_substring_list_get() 436 Returns: nothing 437 */ 438 439 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 440 pcre2_substring_list_free(PCRE2_SPTR *list) 441 { 442 if (list != NULL) 443 { 444 pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl)); 445 memctl->free(memctl, memctl->memory_data); 446 } 447 } 448 449 450 451 /************************************************* 452 * Find (multiple) entries for named string * 453 *************************************************/ 454 455 /* This function scans the nametable for a given name, using binary chop. It 456 returns either two pointers to the entries in the table, or, if no pointers are 457 given, the number of a unique group with the given name. If duplicate names are 458 permitted, and the name is not unique, an error is generated. 459 460 Arguments: 461 code the compiled regex 462 stringname the name whose entries required 463 firstptr where to put the pointer to the first entry 464 lastptr where to put the pointer to the last entry 465 466 Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found 467 otherwise, if firstptr and lastptr are NULL: 468 a group number for a unique substring 469 else PCRE2_ERROR_NOUNIQUESUBSTRING 470 otherwise: 471 the length of each entry, having set firstptr and lastptr 472 */ 473 474 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 475 pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname, 476 PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr) 477 { 478 uint16_t bot = 0; 479 uint16_t top = code->name_count; 480 uint16_t entrysize = code->name_entry_size; 481 PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code)); 482 483 while (top > bot) 484 { 485 uint16_t mid = (top + bot) / 2; 486 PCRE2_SPTR entry = nametable + entrysize*mid; 487 int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE); 488 if (c == 0) 489 { 490 PCRE2_SPTR first; 491 PCRE2_SPTR last; 492 PCRE2_SPTR lastentry; 493 lastentry = nametable + entrysize * (code->name_count - 1); 494 first = last = entry; 495 while (first > nametable) 496 { 497 if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break; 498 first -= entrysize; 499 } 500 while (last < lastentry) 501 { 502 if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break; 503 last += entrysize; 504 } 505 if (firstptr == NULL) return (first == last)? 506 (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING; 507 *firstptr = first; 508 *lastptr = last; 509 return entrysize; 510 } 511 if (c > 0) bot = mid + 1; else top = mid; 512 } 513 514 return PCRE2_ERROR_NOSUBSTRING; 515 } 516 517 518 /************************************************* 519 * Find number for named string * 520 *************************************************/ 521 522 /* This function is a convenience wrapper for pcre2_substring_nametable_scan() 523 when it is known that names are unique. If there are duplicate names, it is not 524 defined which number is returned. 525 526 Arguments: 527 code the compiled regex 528 stringname the name whose number is required 529 530 Returns: the number of the named parenthesis, or a negative number 531 PCRE2_ERROR_NOSUBSTRING if not found 532 PCRE2_ERROR_NOUNIQUESUBSTRING if not unique 533 */ 534 535 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 536 pcre2_substring_number_from_name(const pcre2_code *code, 537 PCRE2_SPTR stringname) 538 { 539 return pcre2_substring_nametable_scan(code, stringname, NULL, NULL); 540 } 541 542 /* End of pcre2_substring.c */ 543