1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifdef HAVE_CONFIG_H 43 #include "config.h" 44 #endif 45 46 #include "pcre2_internal.h" 47 48 49 /************************************************* 50 * Return info about compiled pattern * 51 *************************************************/ 52 53 /* 54 Arguments: 55 code points to compiled code 56 what what information is required 57 where where to put the information; if NULL, return length 58 59 Returns: 0 when data returned 60 > 0 when length requested 61 < 0 on error or unset value 62 */ 63 64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) 66 { 67 const pcre2_real_code *re = (pcre2_real_code *)code; 68 69 if (where == NULL) /* Requests field length */ 70 { 71 switch(what) 72 { 73 case PCRE2_INFO_ALLOPTIONS: 74 case PCRE2_INFO_ARGOPTIONS: 75 case PCRE2_INFO_BACKREFMAX: 76 case PCRE2_INFO_BSR: 77 case PCRE2_INFO_CAPTURECOUNT: 78 case PCRE2_INFO_FIRSTCODETYPE: 79 case PCRE2_INFO_FIRSTCODEUNIT: 80 case PCRE2_INFO_HASBACKSLASHC: 81 case PCRE2_INFO_HASCRORLF: 82 case PCRE2_INFO_JCHANGED: 83 case PCRE2_INFO_LASTCODETYPE: 84 case PCRE2_INFO_LASTCODEUNIT: 85 case PCRE2_INFO_MATCHEMPTY: 86 case PCRE2_INFO_MATCHLIMIT: 87 case PCRE2_INFO_MAXLOOKBEHIND: 88 case PCRE2_INFO_MINLENGTH: 89 case PCRE2_INFO_NAMEENTRYSIZE: 90 case PCRE2_INFO_NAMECOUNT: 91 case PCRE2_INFO_NEWLINE: 92 case PCRE2_INFO_RECURSIONLIMIT: 93 return sizeof(uint32_t); 94 95 case PCRE2_INFO_FIRSTBITMAP: 96 return sizeof(const uint8_t *); 97 98 case PCRE2_INFO_JITSIZE: 99 case PCRE2_INFO_SIZE: 100 return sizeof(size_t); 101 102 case PCRE2_INFO_NAMETABLE: 103 return sizeof(PCRE2_SPTR); 104 } 105 } 106 107 if (re == NULL) return PCRE2_ERROR_NULL; 108 109 /* Check that the first field in the block is the magic number. If it is not, 110 return with PCRE2_ERROR_BADMAGIC. */ 111 112 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; 113 114 /* Check that this pattern was compiled in the correct bit mode */ 115 116 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; 117 118 switch(what) 119 { 120 case PCRE2_INFO_ALLOPTIONS: 121 *((uint32_t *)where) = re->overall_options; 122 break; 123 124 case PCRE2_INFO_ARGOPTIONS: 125 *((uint32_t *)where) = re->compile_options; 126 break; 127 128 case PCRE2_INFO_BACKREFMAX: 129 *((uint32_t *)where) = re->top_backref; 130 break; 131 132 case PCRE2_INFO_BSR: 133 *((uint32_t *)where) = re->bsr_convention; 134 break; 135 136 case PCRE2_INFO_CAPTURECOUNT: 137 *((uint32_t *)where) = re->top_bracket; 138 break; 139 140 case PCRE2_INFO_FIRSTCODETYPE: 141 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 : 142 ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0; 143 break; 144 145 case PCRE2_INFO_FIRSTCODEUNIT: 146 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 147 re->first_codeunit : 0; 148 break; 149 150 case PCRE2_INFO_FIRSTBITMAP: 151 *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)? 152 &(re->start_bitmap[0]) : NULL; 153 break; 154 155 case PCRE2_INFO_HASBACKSLASHC: 156 *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0; 157 break; 158 159 case PCRE2_INFO_HASCRORLF: 160 *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; 161 break; 162 163 case PCRE2_INFO_JCHANGED: 164 *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0; 165 break; 166 167 case PCRE2_INFO_JITSIZE: 168 #ifdef SUPPORT_JIT 169 *((size_t *)where) = (re->executable_jit != NULL)? 170 PRIV(jit_get_size)(re->executable_jit) : 0; 171 #else 172 *((size_t *)where) = 0; 173 #endif 174 break; 175 176 case PCRE2_INFO_LASTCODETYPE: 177 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0; 178 break; 179 180 case PCRE2_INFO_LASTCODEUNIT: 181 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 182 re->last_codeunit : 0; 183 break; 184 185 case PCRE2_INFO_MATCHEMPTY: 186 *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0; 187 break; 188 189 case PCRE2_INFO_MATCHLIMIT: 190 *((uint32_t *)where) = re->limit_match; 191 if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET; 192 break; 193 194 case PCRE2_INFO_MAXLOOKBEHIND: 195 *((uint32_t *)where) = re->max_lookbehind; 196 break; 197 198 case PCRE2_INFO_MINLENGTH: 199 *((uint32_t *)where) = re->minlength; 200 break; 201 202 case PCRE2_INFO_NAMEENTRYSIZE: 203 *((uint32_t *)where) = re->name_entry_size; 204 break; 205 206 case PCRE2_INFO_NAMECOUNT: 207 *((uint32_t *)where) = re->name_count; 208 break; 209 210 case PCRE2_INFO_NAMETABLE: 211 *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code)); 212 break; 213 214 case PCRE2_INFO_NEWLINE: 215 *((uint32_t *)where) = re->newline_convention; 216 break; 217 218 case PCRE2_INFO_RECURSIONLIMIT: 219 *((uint32_t *)where) = re->limit_recursion; 220 if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET; 221 break; 222 223 case PCRE2_INFO_SIZE: 224 *((size_t *)where) = re->blocksize; 225 break; 226 227 default: return PCRE2_ERROR_BADOPTION; 228 } 229 230 return 0; 231 } 232 233 234 235 /************************************************* 236 * Callout enumerator * 237 *************************************************/ 238 239 /* 240 Arguments: 241 code points to compiled code 242 callback function called for each callout block 243 callout_data user data passed to the callback 244 245 Returns: 0 when successfully completed 246 < 0 on local error 247 != 0 for callback error 248 */ 249 250 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 251 pcre2_callout_enumerate(const pcre2_code *code, 252 int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data) 253 { 254 pcre2_real_code *re = (pcre2_real_code *)code; 255 pcre2_callout_enumerate_block cb; 256 PCRE2_SPTR cc; 257 #ifdef SUPPORT_UNICODE 258 BOOL utf = (re->overall_options & PCRE2_UTF) != 0; 259 #endif 260 261 if (re == NULL) return PCRE2_ERROR_NULL; 262 263 /* Check that the first field in the block is the magic number. If it is not, 264 return with PCRE2_ERROR_BADMAGIC. */ 265 266 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; 267 268 /* Check that this pattern was compiled in the correct bit mode */ 269 270 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE; 271 272 cb.version = 0; 273 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) 274 + re->name_count * re->name_entry_size; 275 276 while (TRUE) 277 { 278 int rc; 279 switch (*cc) 280 { 281 case OP_END: 282 return 0; 283 284 case OP_CHAR: 285 case OP_CHARI: 286 case OP_NOT: 287 case OP_NOTI: 288 case OP_STAR: 289 case OP_MINSTAR: 290 case OP_PLUS: 291 case OP_MINPLUS: 292 case OP_QUERY: 293 case OP_MINQUERY: 294 case OP_UPTO: 295 case OP_MINUPTO: 296 case OP_EXACT: 297 case OP_POSSTAR: 298 case OP_POSPLUS: 299 case OP_POSQUERY: 300 case OP_POSUPTO: 301 case OP_STARI: 302 case OP_MINSTARI: 303 case OP_PLUSI: 304 case OP_MINPLUSI: 305 case OP_QUERYI: 306 case OP_MINQUERYI: 307 case OP_UPTOI: 308 case OP_MINUPTOI: 309 case OP_EXACTI: 310 case OP_POSSTARI: 311 case OP_POSPLUSI: 312 case OP_POSQUERYI: 313 case OP_POSUPTOI: 314 case OP_NOTSTAR: 315 case OP_NOTMINSTAR: 316 case OP_NOTPLUS: 317 case OP_NOTMINPLUS: 318 case OP_NOTQUERY: 319 case OP_NOTMINQUERY: 320 case OP_NOTUPTO: 321 case OP_NOTMINUPTO: 322 case OP_NOTEXACT: 323 case OP_NOTPOSSTAR: 324 case OP_NOTPOSPLUS: 325 case OP_NOTPOSQUERY: 326 case OP_NOTPOSUPTO: 327 case OP_NOTSTARI: 328 case OP_NOTMINSTARI: 329 case OP_NOTPLUSI: 330 case OP_NOTMINPLUSI: 331 case OP_NOTQUERYI: 332 case OP_NOTMINQUERYI: 333 case OP_NOTUPTOI: 334 case OP_NOTMINUPTOI: 335 case OP_NOTEXACTI: 336 case OP_NOTPOSSTARI: 337 case OP_NOTPOSPLUSI: 338 case OP_NOTPOSQUERYI: 339 case OP_NOTPOSUPTOI: 340 cc += PRIV(OP_lengths)[*cc]; 341 #ifdef SUPPORT_UNICODE 342 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 343 #endif 344 break; 345 346 case OP_TYPESTAR: 347 case OP_TYPEMINSTAR: 348 case OP_TYPEPLUS: 349 case OP_TYPEMINPLUS: 350 case OP_TYPEQUERY: 351 case OP_TYPEMINQUERY: 352 case OP_TYPEUPTO: 353 case OP_TYPEMINUPTO: 354 case OP_TYPEEXACT: 355 case OP_TYPEPOSSTAR: 356 case OP_TYPEPOSPLUS: 357 case OP_TYPEPOSQUERY: 358 case OP_TYPEPOSUPTO: 359 cc += PRIV(OP_lengths)[*cc]; 360 #ifdef SUPPORT_UNICODE 361 if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2; 362 #endif 363 break; 364 365 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 366 case OP_XCLASS: 367 cc += GET(cc, 1); 368 break; 369 #endif 370 371 case OP_MARK: 372 case OP_PRUNE_ARG: 373 case OP_SKIP_ARG: 374 case OP_THEN_ARG: 375 cc += PRIV(OP_lengths)[*cc] + cc[1]; 376 break; 377 378 case OP_CALLOUT: 379 cb.pattern_position = GET(cc, 1); 380 cb.next_item_length = GET(cc, 1 + LINK_SIZE); 381 cb.callout_number = cc[1 + 2*LINK_SIZE]; 382 cb.callout_string_offset = 0; 383 cb.callout_string_length = 0; 384 cb.callout_string = NULL; 385 rc = callback(&cb, callout_data); 386 if (rc != 0) return rc; 387 cc += PRIV(OP_lengths)[*cc]; 388 break; 389 390 case OP_CALLOUT_STR: 391 cb.pattern_position = GET(cc, 1); 392 cb.next_item_length = GET(cc, 1 + LINK_SIZE); 393 cb.callout_number = 0; 394 cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE); 395 cb.callout_string_length = 396 GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2; 397 cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1; 398 rc = callback(&cb, callout_data); 399 if (rc != 0) return rc; 400 cc += GET(cc, 1 + 2*LINK_SIZE); 401 break; 402 403 default: 404 cc += PRIV(OP_lengths)[*cc]; 405 break; 406 } 407 } 408 } 409 410 /* End of pcre2_pattern_info.c */ 411