1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016-2018 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifdef HAVE_CONFIG_H 43 #include "config.h" 44 #endif 45 46 #include "pcre2_internal.h" 47 48 49 /************************************************* 50 * Return info about compiled pattern * 51 *************************************************/ 52 53 /* 54 Arguments: 55 code points to compiled code 56 what what information is required 57 where where to put the information; if NULL, return length 58 59 Returns: 0 when data returned 60 > 0 when length requested 61 < 0 on error or unset value 62 */ 63 64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) 66 { 67 const pcre2_real_code *re = (pcre2_real_code *)code; 68 69 if (where == NULL) /* Requests field length */ 70 { 71 switch(what) 72 { 73 case PCRE2_INFO_ALLOPTIONS: 74 case PCRE2_INFO_ARGOPTIONS: 75 case PCRE2_INFO_BACKREFMAX: 76 case PCRE2_INFO_BSR: 77 case PCRE2_INFO_CAPTURECOUNT: 78 case PCRE2_INFO_DEPTHLIMIT: 79 case PCRE2_INFO_EXTRAOPTIONS: 80 case PCRE2_INFO_FIRSTCODETYPE: 81 case PCRE2_INFO_FIRSTCODEUNIT: 82 case PCRE2_INFO_HASBACKSLASHC: 83 case PCRE2_INFO_HASCRORLF: 84 case PCRE2_INFO_HEAPLIMIT: 85 case PCRE2_INFO_JCHANGED: 86 case PCRE2_INFO_LASTCODETYPE: 87 case PCRE2_INFO_LASTCODEUNIT: 88 case PCRE2_INFO_MATCHEMPTY: 89 case PCRE2_INFO_MATCHLIMIT: 90 case PCRE2_INFO_MAXLOOKBEHIND: 91 case PCRE2_INFO_MINLENGTH: 92 case PCRE2_INFO_NAMEENTRYSIZE: 93 case PCRE2_INFO_NAMECOUNT: 94 case PCRE2_INFO_NEWLINE: 95 return sizeof(uint32_t); 96 97 case PCRE2_INFO_FIRSTBITMAP: 98 return sizeof(const uint8_t *); 99 100 case PCRE2_INFO_JITSIZE: 101 case PCRE2_INFO_SIZE: 102 case PCRE2_INFO_FRAMESIZE: 103 return sizeof(size_t); 104 105 case PCRE2_INFO_NAMETABLE: 106 return sizeof(PCRE2_SPTR); 107 } 108 } 109 110 if (re == NULL) return PCRE2_ERROR_NULL; 111 112 /* Check that the first field in the block is the magic number. If it is not, 113 return with PCRE2_ERROR_BADMAGIC. */ 114 115 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; 116 117 /* Check that this pattern was compiled in the correct bit mode */ 118 119 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; 120 121 switch(what) 122 { 123 case PCRE2_INFO_ALLOPTIONS: 124 *((uint32_t *)where) = re->overall_options; 125 break; 126 127 case PCRE2_INFO_ARGOPTIONS: 128 *((uint32_t *)where) = re->compile_options; 129 break; 130 131 case PCRE2_INFO_BACKREFMAX: 132 *((uint32_t *)where) = re->top_backref; 133 break; 134 135 case PCRE2_INFO_BSR: 136 *((uint32_t *)where) = re->bsr_convention; 137 break; 138 139 case PCRE2_INFO_CAPTURECOUNT: 140 *((uint32_t *)where) = re->top_bracket; 141 break; 142 143 case PCRE2_INFO_DEPTHLIMIT: 144 *((uint32_t *)where) = re->limit_depth; 145 if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET; 146 break; 147 148 case PCRE2_INFO_EXTRAOPTIONS: 149 *((uint32_t *)where) = re->extra_options; 150 break; 151 152 case PCRE2_INFO_FIRSTCODETYPE: 153 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 : 154 ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0; 155 break; 156 157 case PCRE2_INFO_FIRSTCODEUNIT: 158 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 159 re->first_codeunit : 0; 160 break; 161 162 case PCRE2_INFO_FIRSTBITMAP: 163 *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)? 164 &(re->start_bitmap[0]) : NULL; 165 break; 166 167 case PCRE2_INFO_FRAMESIZE: 168 *((size_t *)where) = offsetof(heapframe, ovector) + 169 re->top_bracket * 2 * sizeof(PCRE2_SIZE); 170 break; 171 172 case PCRE2_INFO_HASBACKSLASHC: 173 *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0; 174 break; 175 176 case PCRE2_INFO_HASCRORLF: 177 *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; 178 break; 179 180 case PCRE2_INFO_HEAPLIMIT: 181 *((uint32_t *)where) = re->limit_heap; 182 if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET; 183 break; 184 185 case PCRE2_INFO_JCHANGED: 186 *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0; 187 break; 188 189 case PCRE2_INFO_JITSIZE: 190 #ifdef SUPPORT_JIT 191 *((size_t *)where) = (re->executable_jit != NULL)? 192 PRIV(jit_get_size)(re->executable_jit) : 0; 193 #else 194 *((size_t *)where) = 0; 195 #endif 196 break; 197 198 case PCRE2_INFO_LASTCODETYPE: 199 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0; 200 break; 201 202 case PCRE2_INFO_LASTCODEUNIT: 203 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 204 re->last_codeunit : 0; 205 break; 206 207 case PCRE2_INFO_MATCHEMPTY: 208 *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0; 209 break; 210 211 case PCRE2_INFO_MATCHLIMIT: 212 *((uint32_t *)where) = re->limit_match; 213 if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET; 214 break; 215 216 case PCRE2_INFO_MAXLOOKBEHIND: 217 *((uint32_t *)where) = re->max_lookbehind; 218 break; 219 220 case PCRE2_INFO_MINLENGTH: 221 *((uint32_t *)where) = re->minlength; 222 break; 223 224 case PCRE2_INFO_NAMEENTRYSIZE: 225 *((uint32_t *)where) = re->name_entry_size; 226 break; 227 228 case PCRE2_INFO_NAMECOUNT: 229 *((uint32_t *)where) = re->name_count; 230 break; 231 232 case PCRE2_INFO_NAMETABLE: 233 *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); 234 break; 235 236 case PCRE2_INFO_NEWLINE: 237 *((uint32_t *)where) = re->newline_convention; 238 break; 239 240 case PCRE2_INFO_SIZE: 241 *((size_t *)where) = re->blocksize; 242 break; 243 244 default: return PCRE2_ERROR_BADOPTION; 245 } 246 247 return 0; 248 } 249 250 251 252 /************************************************* 253 * Callout enumerator * 254 *************************************************/ 255 256 /* 257 Arguments: 258 code points to compiled code 259 callback function called for each callout block 260 callout_data user data passed to the callback 261 262 Returns: 0 when successfully completed 263 < 0 on local error 264 != 0 for callback error 265 */ 266 267 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 268 pcre2_callout_enumerate(const pcre2_code *code, 269 int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data) 270 { 271 pcre2_real_code *re = (pcre2_real_code *)code; 272 pcre2_callout_enumerate_block cb; 273 PCRE2_SPTR cc; 274 #ifdef SUPPORT_UNICODE 275 BOOL utf; 276 #endif 277 278 if (re == NULL) return PCRE2_ERROR_NULL; 279 280 #ifdef SUPPORT_UNICODE 281 utf = (re->overall_options & PCRE2_UTF) != 0; 282 #endif 283 284 /* Check that the first field in the block is the magic number. If it is not, 285 return with PCRE2_ERROR_BADMAGIC. */ 286 287 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; 288 289 /* Check that this pattern was compiled in the correct bit mode */ 290 291 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; 292 293 cb.version = 0; 294 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) 295 + re->name_count * re->name_entry_size; 296 297 while (TRUE) 298 { 299 int rc; 300 switch (*cc) 301 { 302 case OP_END: 303 return 0; 304 305 case OP_CHAR: 306 case OP_CHARI: 307 case OP_NOT: 308 case OP_NOTI: 309 case OP_STAR: 310 case OP_MINSTAR: 311 case OP_PLUS: 312 case OP_MINPLUS: 313 case OP_QUERY: 314 case OP_MINQUERY: 315 case OP_UPTO: 316 case OP_MINUPTO: 317 case OP_EXACT: 318 case OP_POSSTAR: 319 case OP_POSPLUS: 320 case OP_POSQUERY: 321 case OP_POSUPTO: 322 case OP_STARI: 323 case OP_MINSTARI: 324 case OP_PLUSI: 325 case OP_MINPLUSI: 326 case OP_QUERYI: 327 case OP_MINQUERYI: 328 case OP_UPTOI: 329 case OP_MINUPTOI: 330 case OP_EXACTI: 331 case OP_POSSTARI: 332 case OP_POSPLUSI: 333 case OP_POSQUERYI: 334 case OP_POSUPTOI: 335 case OP_NOTSTAR: 336 case OP_NOTMINSTAR: 337 case OP_NOTPLUS: 338 case OP_NOTMINPLUS: 339 case OP_NOTQUERY: 340 case OP_NOTMINQUERY: 341 case OP_NOTUPTO: 342 case OP_NOTMINUPTO: 343 case OP_NOTEXACT: 344 case OP_NOTPOSSTAR: 345 case OP_NOTPOSPLUS: 346 case OP_NOTPOSQUERY: 347 case OP_NOTPOSUPTO: 348 case OP_NOTSTARI: 349 case OP_NOTMINSTARI: 350 case OP_NOTPLUSI: 351 case OP_NOTMINPLUSI: 352 case OP_NOTQUERYI: 353 case OP_NOTMINQUERYI: 354 case OP_NOTUPTOI: 355 case OP_NOTMINUPTOI: 356 case OP_NOTEXACTI: 357 case OP_NOTPOSSTARI: 358 case OP_NOTPOSPLUSI: 359 case OP_NOTPOSQUERYI: 360 case OP_NOTPOSUPTOI: 361 cc += PRIV(OP_lengths)[*cc]; 362 #ifdef SUPPORT_UNICODE 363 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 364 #endif 365 break; 366 367 case OP_TYPESTAR: 368 case OP_TYPEMINSTAR: 369 case OP_TYPEPLUS: 370 case OP_TYPEMINPLUS: 371 case OP_TYPEQUERY: 372 case OP_TYPEMINQUERY: 373 case OP_TYPEUPTO: 374 case OP_TYPEMINUPTO: 375 case OP_TYPEEXACT: 376 case OP_TYPEPOSSTAR: 377 case OP_TYPEPOSPLUS: 378 case OP_TYPEPOSQUERY: 379 case OP_TYPEPOSUPTO: 380 cc += PRIV(OP_lengths)[*cc]; 381 #ifdef SUPPORT_UNICODE 382 if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2; 383 #endif 384 break; 385 386 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 387 case OP_XCLASS: 388 cc += GET(cc, 1); 389 break; 390 #endif 391 392 case OP_MARK: 393 case OP_COMMIT_ARG: 394 case OP_PRUNE_ARG: 395 case OP_SKIP_ARG: 396 case OP_THEN_ARG: 397 cc += PRIV(OP_lengths)[*cc] + cc[1]; 398 break; 399 400 case OP_CALLOUT: 401 cb.pattern_position = GET(cc, 1); 402 cb.next_item_length = GET(cc, 1 + LINK_SIZE); 403 cb.callout_number = cc[1 + 2*LINK_SIZE]; 404 cb.callout_string_offset = 0; 405 cb.callout_string_length = 0; 406 cb.callout_string = NULL; 407 rc = callback(&cb, callout_data); 408 if (rc != 0) return rc; 409 cc += PRIV(OP_lengths)[*cc]; 410 break; 411 412 case OP_CALLOUT_STR: 413 cb.pattern_position = GET(cc, 1); 414 cb.next_item_length = GET(cc, 1 + LINK_SIZE); 415 cb.callout_number = 0; 416 cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE); 417 cb.callout_string_length = 418 GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2; 419 cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1; 420 rc = callback(&cb, callout_data); 421 if (rc != 0) return rc; 422 cc += GET(cc, 1 + 2*LINK_SIZE); 423 break; 424 425 default: 426 cc += PRIV(OP_lengths)[*cc]; 427 break; 428 } 429 } 430 } 431 432 /* End of pcre2_pattern_info.c */ 433