Home | History | Annotate | Download | only in pcre

Lines Matching defs:code

15     * Redistributions of source code must retain the above copyright notice,
78 * Code parameters and static tables *
84 soon as they can be, so that hopefully there will never be an overrun. The code
357 "internal error: code overflow\0"
416 may mark arbitrary characters as digits - but the PCRE compiling code expects
428 Then we can use ctype_digit and ctype_xdigit in the code. */
664 the -ESC_g code (cf \k). */
778 larger first octal digit. The original code used just to take the least
872 otherwise, for Perl compatibility, it is a literal. This code looks a bit
920 errorcodeptr points to the error code variable
1045 errorcodeptr points to error code variable
1395 * Find first significant op code *
1399 for a fixed first character, or an anchoring op code etc. It skips over things
1405 code pointer to the start of the group
1415 first_significant_code(const uschar *code, int *options, int optbit,
1420 switch ((int)*code)
1423 if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1424 *options = (int)code[1];
1425 code += 2;
1431 if (!skipassert) return code;
1432 do code += GET(code, 1); while (*code == OP_ALT);
1433 code += _pcre_OP_lengths[*code];
1438 if (!skipassert) return code;
1447 code += _pcre_OP_lengths[*code];
1451 return code;
1476 code points to the start of the pattern (the bracket)
1488 find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
1493 register uschar *cc = code + 1 + LINK_SIZE;
1518 END it's the end of the outer call. All can be handled by the same code. */
1688 code points to start of expression
1696 _pcre_find_bracket(const uschar *code, BOOL utf8, int number)
1700 register int c = *code;
1705 the table is zero; the actual length is stored in the compiled code. */
1707 if (c == OP_XCLASS) code += GET(code, 1);
1713 if (number < 0) return (uschar *)code;
1714 code += _pcre_OP_lengths[c];
1721 int n = GET2(code, 1+LINK_SIZE);
1722 if (n == number) return (uschar *)code;
1723 code += _pcre_OP_lengths[c];
1744 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1751 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
1757 code += code[1];
1761 code += code[1+LINK_SIZE];
1767 code += _pcre_OP_lengths[c];
1791 if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1811 code points to start of expression
1818 find_recurse(const uschar *code, BOOL utf8)
1822 register int c = *code;
1824 if (c == OP_RECURSE) return code;
1828 the table is zero; the actual length is stored in the compiled code. */
1830 if (c == OP_XCLASS) code += GET(code, 1);
1850 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1857 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
1863 code += code[1];
1867 code += code[1+LINK_SIZE];
1873 code += _pcre_OP_lengths[c];
1897 if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1922 code points to start of search
1931 could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
1935 for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
1936 code < endcode;
1937 code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE))
1941 c = *code;
1948 do code += GET(code, 1); while (*code == OP_ALT);
1949 c = *code;
1957 code += _pcre_OP_lengths[c];
1958 do code += GET(code, 1); while (*code == OP_ALT);
1959 c = *code;
1969 const uschar *scode = cd->start_code + GET(code, 1);
1990 if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */
1996 if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
1997 code += GET(code, 1);
2003 if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
2005 code += GET(code, 1);
2007 while (*code == OP_ALT);
2011 c = *code;
2022 actual length is stored in the compiled code, so we must update "code"
2027 ccode = code += GET(code, 1);
2033 ccode = code + 33;
2099 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
2107 if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
2128 if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
2134 if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
2144 code += code[1];
2148 code += code[1+LINK_SIZE];
2173 code points to start of the recursion
2183 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
2186 while (bcptr != NULL && bcptr->current_branch >= code)
2214 class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
2359 code current code pointer
2363 Returns: new code pointer
2367 auto_callout(uschar *code, const uschar *ptr, compile_data *cd)
2369 *code++ = OP_CALLOUT;
2370 *code++ = 255;
2371 PUT(code, 0, (int)(ptr - cd->start_pattern)); /* Pattern offset */
2372 PUT(code, LINK_SIZE, 0); /* Default length */
2373 return code + 2*LINK_SIZE;
2881 well. It means re-organizing the above code so as to get hold of the property
2882 values before switching on the op-code. However, I wonder how many patterns
2942 codeptr points to the pointer to the current code point
2944 errorcodeptr points to error code variable
2972 register uschar *code = *codeptr;
2973 uschar *last_code = code;
2974 uschar *orig_code = code;
3067 if (code > cd->hwm) cd->hwm = code; /* High water info */
3069 if (code > cd->start_workspace + WORK_SIZE_CHECK) /* Check for overrun */
3075 /* There is at least one situation where code goes backwards: this is the
3081 if (code < last_code) code = last_code;
3085 if (OFLOW_MAX - *lengthptr < code - last_code)
3091 *lengthptr += (int)(code - last_code);
3092 DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
3096 if "previous" is NULL, reset the current code pointer to the start. */
3102 memmove(orig_code, previous, code - previous);
3103 code -= previous - orig_code;
3107 else code = orig_code;
3109 /* Remember where this code item starts so we can pick up the length
3112 last_code = code;
3144 previous_callout = code;
3145 code = auto_callout(code, ptr, cd);
3193 previous_callout = code;
3194 code = auto_callout(code, ptr, cd);
3205 *codeptr = code;
3209 if (OFLOW_MAX - *lengthptr < code - last_code)
3214 *lengthptr += (int)(code - last_code); /* To include callout length */
3230 *code++ = OP_CIRC;
3235 *code++ = OP_DOLL;
3245 previous = code;
3246 *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
3274 previous = code;
3311 an initial ']' is taken as a data character -- the code below handles
3318 *code++ = negate_class? OP_ALLANY : OP_FAIL;
3339 than 256), because in that case the compiled code doesn't use the bit map.
3346 class_utf8data = code + LINK_SIZE + 2; /* For UTF-8 items */
3728 entirely. The code for handling \Q and \E is messy. */
4006 *code++ = OP_NOT;
4007 *code++ = class_lastchar;
4012 then we can handle this with the normal one-character code. */
4041 actual compiled code. */
4047 *code++ = OP_XCLASS;
4048 code += LINK_SIZE;
4049 *code = negate_class? XCL_NOT : 0;
4052 otherwise just move the code pointer to the end of the extra data. */
4056 *code++ |= XCL_MAP;
4057 memmove(code + 32, code, class_utf8data - code);
4058 memcpy(code, classbits, 32);
4059 code = class_utf8data + 32;
4061 else code = class_utf8data;
4065 PUT(previous, 1, code - previous);
4073 (non-UCP) in the class. Then copy the 32-byte map into the code vector,
4076 *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
4080 for (c = 0; c < 32; c++) code[c] = ~classbits[c];
4084 memcpy(code, classbits, 32);
4086 code += 32;
4172 if (utf8 && (code[-1] & 0x80) != 0)
4174 uschar *lastchar = code - 1;
4176 c = code - lastchar; /* Length of UTF-8 character */
4187 c = code[-1];
4204 goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
4208 one of the special opcodes, replacing it. The code is shared with single-
4228 create a suitable repeat item. The code is shared with single-character
4231 defined, but we don't wrap the little bits of code here because it just
4257 oldcode = code;
4258 code = previous; /* Usually overwrite previous item */
4266 /* This code is obsolete from release 8.00; the restriction was finally
4284 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
4285 else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
4288 *code++ = OP_UPTO + repeat_type;
4289 PUT2INC(code, 0, repeat_max);
4301 *code++ = OP_PLUS + repeat_type;
4304 code = oldcode; /* leave previous item in place */
4306 *code++ = OP_UPTO + repeat_type;
4307 PUT2INC(code, 0, repeat_max - 1);
4316 *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
4317 PUT2INC(code, 0, repeat_min);
4320 we have to insert the character for the previous code. For a repeated
4330 memcpy(code, utf8_char, c & 7);
4331 code += c & 7;
4336 *code++ = c;
4339 *code++ = prop_type;
4340 *code++ = prop_value;
4343 *code++ = OP_STAR + repeat_type;
4347 preceded by the character, for the previously inserted code. If the
4355 memcpy(code, utf8_char, c & 7);
4356 code += c & 7;
4360 *code++ = c;
4363 *code++ = prop_type;
4364 *code++ = prop_value;
4370 *code++ = OP_QUERY + repeat_type;
4374 *code++ = OP_UPTO + repeat_type;
4375 PUT2INC(code, 0, repeat_max);
4385 memcpy(code, utf8_char, c & 7);
4386 code += c & 7;
4390 *code++ = c;
4398 *code++ = prop_type;
4399 *code++ = prop_value;
4416 code = previous;
4421 /* This code is obsolete from release 8.00; the restriction was finally
4431 *code++ = OP_CRSTAR + repeat_type;
4433 *code++ = OP_CRPLUS + repeat_type;
4435 *code++ = OP_CRQUERY + repeat_type;
4438 *code++ = OP_CRRANGE + repeat_type;
4439 PUT2INC(code, 0, repeat_min);
4441 PUT2INC(code, 0, repeat_max);
4453 int len = (int)(code - previous);
4466 from the current code pointer. There may be an OP_OPT setting following
4467 the final KET, so we can't find the end just by going back from the code
4474 ketoffset = (int)(code - ket);
4481 the code gets far too messy. There are several special subcases when the
4491 ** code = previous;
4509 *code = OP_END;
4512 code++;
4524 copy, which has to be moved up. The remainder can be handled by code
4532 *code = OP_END;
4535 code += 2 + LINK_SIZE;
4589 memcpy(code, previous, len);
4596 code += len;
4604 /* This code is common to both the zero and non-zero minimum cases. If
4642 *code++ = OP_BRAZERO + repeat_type;
4650 *code++ = OP_BRA;
4651 offset = (bralink == NULL)? 0 : (int)(code - bralink);
4652 bralink = code;
4653 PUTINC(code, 0, offset);
4656 memcpy(code, previous, len);
4663 code += len;
4672 int offset = (int)(code - bralink + 1);
4673 uschar *bra = code - offset;
4676 *code++ = OP_KET;
4677 PUTINC(code, 0, offset);
4683 can't just offset backwards from the current code point, because we
4695 uschar *ketcode = code - ketoffset;
4760 len = (int)(code - tempcode);
4778 /* Because we are moving code along, we must ensure that any
4782 *code = OP_END;
4785 code += 1 + LINK_SIZE;
4788 *code++ = OP_KET;
4789 PUTINC(code, 0, len);
4860 *code++ = OP_CLOSE;
4861 PUT2INC(code, 0, oc->number);
4874 *code = verbs[i].op;
4875 if (*code++ == OP_THEN)
4877 PUT(code, 0, code - bcptr->current_branch - 1);
4878 code += LINK_SIZE;
4889 *code = verbs[i].op_arg;
4890 if (*code++ == OP_THEN_ARG)
4892 PUT(code, 0, code - bcptr->current_branch - 1);
4893 code += LINK_SIZE;
4895 *code++ = arglen;
4896 memcpy(code, arg, arglen);
4897 code += arglen;
4898 *code++ = 0;
4975 code[1+LINK_SIZE] = OP_CREF;
4985 code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */
5059 PUT2(code, 2+LINK_SIZE, recno);
5081 PUT2(code, 2+LINK_SIZE, recno);
5082 code[1+LINK_SIZE]++;
5090 PUT2(code, 2+LINK_SIZE, i);
5091 code[1+LINK_SIZE]++;
5122 code[1+LINK_SIZE] = OP_RREF; /* Change test type */
5123 code, 2+LINK_SIZE, recno);
5131 code[1+LINK_SIZE] = OP_DEF;
5140 PUT2(code, 2+LINK_SIZE, recno);
5165 *code++ = OP_FAIL;
5205 previous_callout = code; /* Save for later completion */
5207 *code++ = OP_CALLOUT;
5222 *code++ = n;
5223 PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */
5224 PUT(code, LINK_SIZE, 0); /* Default length */
5225 code += 2 * LINK_SIZE;
5292 would all have to be modified, and the compiled code moved down, if
5417 do a simple search as in the code below. Instead, we have to scan the
5462 /* In both phases, we can now go to the code than handles numerical
5483 /* Come here from the \g<...> and \g'...' code (Oniguruma
5541 /* Come here from code above that handles a named recursion */
5545 previous = code;
5557 *code = OP_END;
5577 PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));
5585 could_be_empty(called, code, bcptr, utf8, cd))
5596 *code
5597 PUT(code, 1, 2 + 2*LINK_SIZE);
5598 code += 1 + LINK_SIZE;
5600 *code = OP_RECURSE;
5601 PUT(code, 1, (int)(called - cd->start_code));
5602 code += 1 + LINK_SIZE;
5604 *code = OP_KET;
5605 PUT(code, 1, 2 + 2*LINK_SIZE);
5606 code += 1 + LINK_SIZE;
5658 If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
5660 compiled. In the pre-compile phase, however, the code pointer can have
5661 that value after the start, because it gets reset as code is discarded
5669 If we are not at the pattern start, compile code to change the ims
5675 if (code == cd->start_code + 1 + LINK_SIZE &&
5684 *code++ = OP_OPT;
5685 *code++ = newoptions & PCRE_IMS;
5727 PUT2(code, 1+LINK_SIZE, cd->bracount);
5732 other kinds can be. All their opcodes are >= OP_ONCE. We copy code into a
5737 previous = (bravalue >= OP_ONCE)? code : NULL;
5738 *code = bravalue;
5739 tempcode = code;
5746 &tempcode, /* Where to put code (updated) */
5762 /* At the end of compiling, code is still pointing to the start of the
5774 uschar *tc = code;
5786 if (code[LINK_SIZE+1] == OP_DEF)
5820 less the brackets at either end. Then reduce the compiled code to just a
5832 *code++ = OP_BRA;
5833 PUTINC(code, 0, 1 + LINK_SIZE);
5834 *code++ = OP_KET;
5835 PUTINC(code, 0, 1 + LINK_SIZE);
5839 /* Otherwise update the main code pointer to the end of the group. */
5841 code = tempcode;
5850 branches (see code above). If the bracket is followed by a quantifier with
6023 previous = code;
6024 *code++ = OP_REF;
6025 PUT2INC(code, 0, recno);
6052 previous = code;
6053 *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
6054 *code++ = ptype;
6055 *code++ = pdata;
6085 previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6086 *code++ = -c;
6131 previous = code;
6132 *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;
6133 for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
6156 if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;
6169 reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
6194 points to the closing bracket, or vertical bar, or end of string. The code
6208 codeptr -> the address of the current code pointer
6210 errorcodeptr -> pointer to error code variable
6231 uschar *code = *codeptr;
6232 uschar *last_branch = code;
6233 uschar *start_bracket = code;
6246 bc.current_branch = code;
6253 lenthptr for NULL. We cannot do this by looking at the value of code at the
6260 the code that abstracts option settings at the start of the pattern and makes
6268 if (*code == OP_CBRA)
6270 capnumber = GET2(code, 1 + LINK_SIZE);
6279 PUT(code, 1, 0);
6280 code += 1 + LINK_SIZE + skipbytes;
6296 *code++ = OP_OPT;
6297 *code++ = options & PCRE_IMS;
6305 *code++ = OP_REVERSE;
6306 reverse_count = code;
6307 PUTINC(code, 0, 0);
6314 if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
6389 *code = OP_END;
6412 compile a resetting op-code following, except at the very end of the pattern.
6419 int branch_length = (int)(code - last_branch);
6432 *code = OP_KET;
6433 PUT(code, 1, (int)(code - start_bracket));
6434 code += 1 + LINK_SIZE;
6445 code - start_bracket);
6447 code += 1 + LINK_SIZE;
6448 PUT(start_bracket, 1, (int)(code - start_bracket));
6449 *code = OP_KET;
6450 PUT(code, 1, (int)(code - start_bracket));
6451 code += 1 + LINK_SIZE;
6461 *code++ = OP_OPT;
6462 *code++ = oldims;
6472 *codeptr = code;
6488 /* Another branch follows. In the pre-compile phase, we can move the code
6499 code = *codeptr + 1 + LINK_SIZE + skipbytes;
6504 *code = OP_ALT;
6505 PUT(code, 1, (int)(code - last_branch));
6506 bc.current_branch = last_branch = code;
6507 code += 1 + LINK_SIZE;
6529 This is the code for \G, which means "match at start of match position, taking
6546 code points to start of expression (the bracket)
6557 is_anchored(register const uschar *code, int *options, unsigned int bracket_map,
6561 const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
6603 code += GET(code, 1);
6605 while (*code == OP_ALT); /* Loop for each alternative */
6623 code points to start of expression (the bracket)
6633 is_startline(const uschar *code, unsigned int bracket_map,
6637 const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
6706 code += GET(code, 1);
6708 while (*code == OP_ALT); /* Loop for each alternative */
6727 code points to start of expression (the bracket)
6735 find_firstassertedchar(const uschar *code, int *options, BOOL inassert)
6741 first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS, TRUE);
6777 code += GET(code, 1);
6779 while (*code == OP_ALT);
6791 function had no error code return variable; it is retained for backwards
6797 errorcodeptr pointer to error code variable (pcre_compile2() only)
6798 can be NULL if you don't want a code value
6826 uschar *code;
6845 can do is just return NULL, but we can set a code value if there is a code
6924 /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6941 /* Can't support UCP unless PCRE has been compiled to include the code. */
6963 current code allows for fixed one- or two-byte sequences, plus "any" and
7017 to compile parts of the pattern into; the compiled code is discarded when it is
7042 code = cworkspace;
7043 *code = OP_BRA;
7045 &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
7092 /* The starting points of the name/number translation table and of the code are
7116 code = (uschar *)codestart;
7117 *code = OP_BRA;
7118 (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
7133 *code++ = OP_END;
7136 if (code - codestart > length) errorcode = ERR23;
7285 /* This check is done here in the debugging case so that the code that
7288 if (code - codestart > length)